# Linear profiles with SequenceOptimizer

This notebook demonstrates using the linear profile functionality in GOOSE. In this demo, we will:
- Create sequences where we optimize properties over a sliding window based on a starting sequence
- Create sequences where we optimize properties over a sliding window towards arbitrarily created profiles
- Visualize results


In [None]:
import goose
from goose.optimize import SequenceOptimizer
import sparrow
from sparrow import Protein
import numpy as np
import matplotlib.pyplot as plt
import random

# Example 1: Matching the linear profile of NCPR in the G3BP1 IDR

In [None]:
g3bp1='MVMEKPSPLLVGREFVRQYYTLLNQAPDMLHRFYGKNSSYVHGGLDSNGKPADAVYGQKEIHRKVMSQNFTNCHTKIRHVDAHATLNDGVVVQVMGLLSNNNQALRRFMQTFVLAPEGSVANKFYVHNDIFRYQDEVFGGFVTEPQEESEEEVEEPEERQQTPEVVPDDSGTFYDQAVVSNDMEEHLEEPVAEPEPDPEPEPEQEPVSEIQEEKPEPVLEETAPEDAQKSSSPAPADIAQTVQEDLRTFSWASVTSKNLPPSGAVPVTGIPPHVVKVPASQPRPESKPESQIPPQRPQRDQRVREQRINIPPQRGPRPIREAGEQGDIEPRRMVRHPDSHQLFIGNLPHEVDKSELKDFFQSYGNVVELRINSGGKLPNFGFVVFDDSEPVQKVLSNRPIMFRGEVRLNVEEKKTRAAREGDRRDNRLRGPGGPRGGLGGGMRGPPRGGMVQKPGFGVGRGLAPRQ'
g3bp1_idr = g3bp1[130:342] # predicted from metapredict v3

In [None]:
optimizer=SequenceOptimizer(
                            target_length=len(g3bp1_idr),
                            max_iterations=4000)

optimizer.add_property(goose.NCPR,
                       calculate_as_linear_profile=True,
                       target_sequence=g3bp1_idr,
                       tolerance=0.05,
                       window_size=5)

optimizer.add_property(goose.FractionDisorder,
                       target_value=1)

test_seq = optimizer.run()

In [None]:
fig, ax1 = plt.subplots(1,1,figsize=(5,3),sharex=True, sharey=True)
g3bp1_profile = [Protein(g3bp1_idr[i:i+5]).NCPR for i in range(len(g3bp1_idr)-4)]
var_profile = [Protein(test_seq[i:i+5]).NCPR for i in range(len(test_seq)-4)]
ax1.plot(g3bp1_profile, label='G3BP1 NCPR profile', color='blue')
ax1.plot(var_profile, label='Variant sequence NCPR profile', color='orange')
ax1.set_ylabel('NCPR')
ax1.set_xlabel('Residue index')
ax1.legend()
plt.suptitle('NCPR profiles of G3BP1 IDR and variant sequence')
plt.tight_layout()
plt.show()

# Example 2: Matching the NCPR to a cosine wave profile

In [None]:
def create_cosine_wave(duration, frequency, sample_rate):
    """
    Generates a cosine wave of arbitrary length.

    Args:
        duration (float): The total duration of the wave in seconds.
        frequency (float): The frequency of the wave in Hertz (Hz).
        sample_rate (int): The number of data points per second.

    Returns:
        tuple: A tuple containing the time axis and the cosine wave data.
    """
    # Create the time axis array with evenly spaced values
    num_samples = int(duration * sample_rate)
    time = np.linspace(0, duration, num_samples, endpoint=False)
    
    # Generate the cosine wave
    # The formula for a cosine wave is A * cos(2 * pi * f * t + phase)
    # Here, we use a default amplitude of 1 and a phase of 0.
    amplitude = np.cos(2 * np.pi * frequency * time)
    
    return time, amplitude

In [None]:
time, amplitude = create_cosine_wave(1, 2, 100)

In [None]:
# Create optimizer
optimizer = goose.SequenceOptimizer(target_length=100, verbose=True,
                                   max_iterations=2000)

# Disorder constraints
optimizer.add_property(goose.NCPR,  
                       calculate_as_linear_profile=True,
                       target_profile = amplitude)

optimizer.add_property(goose.FractionDisorder,
                       target_value=1)

# run
var=optimizer.run()


In [None]:
fig, ax1 = plt.subplots(1,1,figsize=(5,3),sharex=True, sharey=True)
var_profile = [Protein(var[i:i+5]).NCPR for i in range(len(var)-4)]
# plot amplitude but trim edgest because the profile is shorter due to windowing
ax1.plot(amplitude[2:len(amplitude)-2], label='Target', color='blue')
ax1.plot(var_profile, label='Variant', color='orange')
ax1.set_ylabel('NCPR')
ax1.set_xlabel('Residue index')
ax1.legend()
plt.suptitle('NCPR profiles variant matched to cosine wave')
plt.tight_layout()
plt.show()