# Tool Flow
Per the user input  
  
Functions in this notebook and what they wrap:  
`predict_log2fc()`  
1. `usr_seq()`  
    a. `get_seq()`  
  
2. `get_feat()`  
    a. `get_prot_mass()`  
    b. `get_biopy_feat()`  
    c. `count_aa_types()`  
  
3. `scale_input_feat()`    
    
4. `bagging_regr()` - the modeling function that outputs the predicted log2fc  
5. `vizualization_thing_here()`

`multi_pred_log2fc()`

## Input EID or sequence

In [None]:
user_input = eid

## The `usr_seq` Function <span style="color:red">--NEED ENTREZ `get_seq()` FUNCTION HERE

In [2]:
def usr_seq(user_input, email):
    """If user input is an integer (EID and not a sequence that would be a string), get the sequence
    """
    t = type(user_input)
    if t is int:
        **************************
        user_sequence = get_seq(user_input, email)
    else:
        user_sequence = user_input
    
    return user_sequence

In [4]:
def test_usr_seq():
    # Case 1: input is EID
    usr_in = 15599626
    result = usr_seq(usr_in)
    assert result is str, 'the usr_seq function does not work'
    # Case 2: input is sequence
    usr_in = 'juliaisthecoolest'
    result = usr_seq(usr_in)
    assert result == usr_in, 'the usr_seq function does not work'
    return

## The `get_feat` function

In [None]:
def get_feat(user_sequence):
    """
    This function takes the sequence that the user is querying and returns a dataframe appended 
    with all of the features used in the predictive model
    """
    
    mass = get_prot_mass(user_sequence)
    
    molwt_biopy, aromaticity, isoelctric_pt = get_biopy_feat(user_sequence)
    
    nonpolar, positive, polar, negative = count_aa_types(user_sequence)
    
    # Make the features into a Pandas DataFrame
    feat_list = [ nonpolar, positive, negative, polar, mass, aromaticity, isoelctric_pt,]
    column_names = ['AA_NP','AA_POS','AA_NEG', 'AA_POL', 'MW', 'AROM', 'ISO_E']
    df = pd. DataFrame(feat_list, columns=column_names)
    
    return df

# Overall tool function:

In [1]:
def predict_log2fc(user_input, email):
    """
    This function takes the user input of either a protein accesion number or 
    an amino acid sequence and predicts how much the sequence will change after 
    being in space (near-zero gravity and increased radiation exposure)
    """
    
    # Get user sequence
    user_sequence = usr_seq(user_input)
    
    # Get features
    user_features = get_feat(user_sequence)
    
    # Make dataframe of features
    column_names = ['AA_NP','AA_POS','AA_NEG', 'AA_POL', 'MW', 'AROM', 'ISO_E']
    features_df = pd.DataFrame(user_features, columns=column_names)
    
    # Scale features
    scaled_features = scale_input_feat(user_features)
    
    # Predict log2fc
    prediction = bagging_regr(test_ratio, n_estim, seed_random, X, y)
    
    print('The predicted log2fc is ', prediction)
    
    return prediction

## Do multiple predictions

In [None]:
def multi_pred_log2fc(user_inputs, email):
    """Input the proteins of interest as a list.  This function will iterate through 
    and return a list of predicted log2fc's.
    """
    
    input_length = len(user_input)
    
    # Initiate results list
    result_log2fc = []
    
    for i in range(input_length):
        result = predict_log2fc(user_input[i], email)
        result_log2fc.append(result)
    
    return result_log2fc