In [None]:
import numpy as np
from scipy import stats

In [None]:
a = np.array(np.random.randint(0, 10, 10))
b = np.array(np.random.randint(0, 10, 10))
print(a, b)

In [None]:
t_stat, p_val = stats.ttest_ind(a, b)

In [None]:
t_stat, p_val

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame({'a': a, 'b': b, 'label': [0,0,0,0,0,1,1,1,1,1]})

In [None]:
df['name'] = df['label'].map({0: 'a', 1: 'b'})

In [None]:
str(df[df['label'] == 0]['name'].unique()[0])

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib widget


def lorentzian(x, center, intensity, width):
    return intensity/(1+((x-center)/width)**2)

def simulate_spectrum(chemical_shift, intensities, widths, ppm_range=(-1,10), points=1000):
    ppm_values = np.linspace(ppm_range[0], ppm_range[1], points)
    spectrum = np.zeros_like(ppm_values)
    
    for i in range(len(chemical_shift)):
        spectrum += lorentzian(ppm_values, chemical_shift[i], intensities[i], widths[i])
        
    return ppm_values, spectrum

df = pd.DataFrame()
for i in range(80):
    
    tsp = {0: [-0.005, 0, 0.005],
        1: [50000, 1000000, 50000],
        2: [0.001, 0.0015, 0.001]}

    lactate = {0: [1.230, 1.238, 4.100, 4.108, 4.116, 4.124],
            1: [2516000, 2515000, 1501600, 2015000, 2015000, 1501600],
            2: [0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005]}

    acetate = {0: [1.910],
            1: [5802400],
            2: [0.0005]}

    alanine = {0: [1.424, 1.432, 3.758, 3.766, 3.774, 3.782],
            1: [1954800, 1954800, 1386492.54, 1857900, 1857900, 1386492.54],
            2: [0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005]}
    
    unknown_1 = {0: [3.424, 3.432, 7.758, 8.566, 5.774, 4.782],
            1: [1954800, 1954800, 1386492.54, 1857900, 1857900, 1386492.54],
            2: [0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005]}
    
    unknown_2 = {0: [2.424, 2.432, 6.758, 5.766, 8.774, 9.782],
            1: [1954800, 1954800, 1386492.54, 1857900, 1857900, 1386492.54],
            2: [0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0005]}
    
    if i <= 40:
        lactate[1] = [x * (np.random.random()+5) for x in lactate[1]]
        acetate[1] = [x * (np.random.random()+2) for x in acetate[1]]
        alanine[1] = [x * (np.random.random()+5) for x in alanine[1]]
        unknown_1[1] = [x * (np.random.random()+1) for x in alanine[1]]
        unknown_2[1] = [x * (np.random.random()+1) for x in alanine[1]]
        
    else:
        lactate[1] = [x * (np.random.random()+2) for x in lactate[1]]
        acetate[1] = [x * (np.random.random()+2) for x in acetate[1]]
        alanine[1] = [x * (np.random.random()+2) for x in alanine[1]] 
        unknown_1[1] = [x * (np.random.random()+5) for x in alanine[1]]
        unknown_2[1] = [x * (np.random.random()+5) for x in alanine[1]]      
    
    
    chemical_shifts = tsp[0] + lactate[0] + acetate[0] + alanine[0] + unknown_1[0] + unknown_2[0]
    intensities =  tsp[1] + lactate[1] + acetate[1] + alanine[1] + unknown_1[1] + unknown_2[1]
    widths =  tsp[2] + lactate[2] + acetate[2] + alanine[2] + unknown_1[2] + unknown_2[2]

    ppm_values, spectrum = simulate_spectrum(chemical_shifts, intensities, widths)
    
    df_x = pd.DataFrame(spectrum).T

    
    df = pd.concat([df, df_x], axis=0)
    
    
df.reset_index(inplace=True)

df = df.iloc[:, 1:]

df.columns = ppm_values

df['Group'] = ['Disease'] * 40 + ['Healthy'] * 40
df['Time point'] = ['Pre-op'] *20 + ['Post-op'] *20 + ['Pre-op'] *20 + ['Post-op'] *20

In [None]:
X = df.iloc[:, :-2]
y = df['Group']
ppm = df.columns[:-2].astype(float).values

In [None]:
from lingress import lin_regression


In [None]:
test = lin_regression(x=X, target=y, label=y, features_name=ppm)

In [None]:
test.create_dataset()

In [None]:
test.fit_model()

In [None]:
test.volcano_plot(p_val_cut_off=1, fc_cut_off=1)

In [None]:
import numpy as np
test.report()

In [None]:
label_a = 'A'
label_b = 'B'
p_val_cut_off = 1
fc_cut_off = 1

In [None]:
log2_fc = test.l2_df2
pval = test.pval_df
beta = test.beta_df
log10_p = -np.log10(pval)
log10_p.columns=["-Log10 P-value"]
df_vol = pd.concat([log10_p, log2_fc, beta], axis=1)
df_vol.columns=["-Log10 P-value", "Log2 FC", "Beta"]

In [None]:
df_vol

In [None]:
cutoff_ = pd.DataFrame()
cutoff_['p-value cut off'] = np.where(df_vol['-Log10 P-value'] >= p_val_cut_off, f"Pass", "Under cut off")
cutoff_['FC cut off'] = np.where(df_vol['Log2 FC'] >= fc_cut_off, f"High in {label_a}", 
                    np.where(df_vol['Log2 FC'] <= -fc_cut_off, f"High in {label_b}", 
                    "Under cut off"))

report_ = []
for index, row in cutoff_.iterrows():
    if row['p-value cut off'] == "Pass" and row['FC cut off'] == f"High in {label_a}":
        report_.append(f"High in {label_a}")
    elif row['p-value cut off'] == "Pass" and row['FC cut off'] == f"High in {label_b}":
        report_.append(f"High in {label_b}")
    else:
        report_.append("Under cut off")

df_vol["Threshold"] = report_
df_vol['Features'] = df_vol.index

In [None]:
test__ = df_vol['Beta']

In [None]:
test__

In [None]:
def plot(show_line=True):
        import plotly.express as px        
        # x and y given as DataFrame columns
        fig = px.scatter(df_vol, x="Log2 FC", y="-Log10 P-value", height=900, width=1600,
                        color="Threshold",color_discrete_map = {f"High in {label_a}": "#E02000",
                                                                f"High in {label_b}": "#203E8A", 
                                                                "Under cut off": "#D9D9D9"},
                        labels={"-Log10 P-value": "-log<sub>10</sub> (<i>p-value</i>)",
                                "Log2 FC": "Log<sub>2</sub> <i>Fold change</i>",},
                        hover_data={"-Log10 P-value": True, "Log2 FC": True, "Beta": True, 'Features': True, 'Threshold': True},
                        )
                        
        fig.update_traces(hovertemplate="<br>".join([
                        "Cut off: %{customdata[2]}",
                        "Log2 FC: %{x}",
                        "-Log10 P-value: %{y}",
                        "Beta: %{customdata[0]}",
                        "Features: %{customdata[1]}"
                        f"Test: {test__}"
                        # Add more custom data here if needed
                ]))

        fig.update_layout(
                        title={
                'text': "<b>Volcano plot of {} vs {}</b>".format(label_a, label_b),
                'y':0.98,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top'})

        #Hide legend
        fig.update_traces(showlegend=False)
        #Hide text label
        fig.update_traces(textposition='top center').data[0]

        if show_line == True:
                fig.add_shape(type='line', x0=-10, y0=p_val_cut_off, x1=10, y1=p_val_cut_off,
                        line=dict(color='red', width=2, dash='dot'))

                fig.add_shape(type='line', x0=-fc_cut_off, y0=0, x1=-fc_cut_off, y1=10,
                        line=dict(color='red', width=2, dash='dot'))
                        
                fig.add_shape(type='line', x0=fc_cut_off, y0=0, x1=fc_cut_off, y1=10,
                        line=dict(color='red', width=2, dash='dot'))
        else:
                pass
        return fig

In [None]:
plot()

In [None]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris

In [None]:
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
app = dash.Dash(__name__)

In [None]:
app.layout = html.Div([
    html.Label('Select Principal Component for x-axis'),
    dcc.Dropdown(
        id='x-axis-dropdown',
        options=[{'label': f'PC{i+1}', 'value': i} for i in range(X.shape[1])],
        value=0
    ),
    html.Label('Select Principal Component for y-axis'),
    dcc.Dropdown(
        id='y-axis-dropdown',
        options=[{'label': f'PC{i+1}', 'value': i} for i in range(X.shape[1])],
        value=1
    ),
    dcc.Graph(id='scatter-plot')
])

In [None]:
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('x-axis-dropdown', 'value'),
     Input('y-axis-dropdown', 'value')]
)
def update_scatter_plot(x_axis_component, y_axis_component):
    pca = PCA(n_components=10)
    X_pca = pca.fit_transform(X)
    fig = px.scatter(x=X_pca[:, x_axis_component], y=X_pca[:, y_axis_component], color=y,
                        labels={'x': f'PC {x_axis_component+1}', 
                                'y': f'PC {y_axis_component+1}'})
    fig.update_layout(title='PCA of Iris dataset')
    
    return fig

In [None]:
if __name__ == '__main__':
    app.run_server(debug=True, use_reloader=False)

In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import pandas as pd

# Load sample data
iris = load_iris()
X = iris.data
y = iris.target

# Perform PCA with all available components
pca = PCA()  # No n_components specified
X_pca = pca.fit_transform(X)
n_components = min(len(X), len(X[0]))  # Number of components is automatically determined
df = pd.DataFrame(X_pca, columns=[f'PC{i+1}' for i in range(n_components)])
df['Target'] = y

# Initialize the Dash app
app = dash.Dash(__name__)

# Define layout
app.layout = html.Div([
    dcc.Graph(id='scatter-plot'),
    html.Label('Select components to display:'),
    dcc.Dropdown(
        id='component-dropdown-x',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC1'
    ),
    dcc.Dropdown(
        id='component-dropdown-y',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC2'
    )
])

# Define callback to update scatter plot
@app.callback(
    Output('scatter-plot', 'figure'),
    [Input('component-dropdown-x', 'value'),
     Input('component-dropdown-y', 'value')]
)
def update_scatter_plot(selected_component_x, selected_component_y):
    fig = px.scatter(df, x=selected_component_x, y=selected_component_y, color='Target',
                     title=f'Scatter Plot of PCA Scores ({selected_component_x} vs {selected_component_y})',
                     labels={'Target': 'Species'})
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)


In [None]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris
import pandas as pd


# Load sample data
iris = load_iris()
X = iris.data
y = iris.target

# Perform PCA with all available components
pca = PCA()  # No n_components specified
X_pca = pca.fit_transform(X)
n_components = min(len(X), len(X[0]))  # Number of components is automatically determined
df = pd.DataFrame(X_pca, columns=[f'PC{i+1}' for i in range(n_components)])
df['Target'] = y


# Initialize the Dash app
app = dash.Dash(__name__)


# Define layout
app.layout = html.Div([
    dcc.Graph(id='scatter-plot-2d'),
    dcc.Graph(id='scatter-plot-3d'),
    html.Label('Select components for 2D scatter plot:'),
    dcc.Dropdown(
        id='component-dropdown-x-2d',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC1'
    ),
    dcc.Dropdown(
        id='component-dropdown-y-2d',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC2'
    ),
    html.Label('Select components for 3D scatter plot:'),
    dcc.Dropdown(
        id='component-dropdown-x-3d',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC1'
    ),
    dcc.Dropdown(
        id='component-dropdown-y-3d',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC2'
    ),
    dcc.Dropdown(
        id='component-dropdown-z-3d',
        options=[{'label': f'PC{i+1}', 'value': f'PC{i+1}'} for i in range(n_components)],
        value='PC3'
    )
])



# Define callback to update 2D scatter plot
@app.callback(
    Output('scatter-plot-2d', 'figure'),
    [Input('component-dropdown-x-2d', 'value'),
     Input('component-dropdown-y-2d', 'value')]
)
def update_scatter_plot_2d(selected_component_x, selected_component_y):
    fig = px.scatter(df, x=selected_component_x, y=selected_component_y, color='Target',
                     title=f'Scatter Plot of PCA Scores ({selected_component_x} vs {selected_component_y})',
                     labels={'Target': 'Species'})
    return fig

# Define callback to update 3D scatter plot
@app.callback(
    Output('scatter-plot-3d', 'figure'),
    [Input('component-dropdown-x-3d', 'value'),
     Input('component-dropdown-y-3d', 'value'),
     Input('component-dropdown-z-3d', 'value')]
)
def update_scatter_plot_3d(selected_component_x, selected_component_y, selected_component_z):
    fig = px.scatter_3d(df, x=selected_component_x, y=selected_component_y, z=selected_component_z,
                        color='Target', title='3D Scatter Plot of PCA Scores',
                        labels={'Target': 'Species'})
    return fig


if __name__ == '__main__':
    app.run_server(debug=True, port=8050)

In [None]:

    
from flask import send_file

# Run the app
if __name__ == '__main__':
    app.run_server(debug=False)

# Export the Dash app to an HTML file
client = app.server.test_client()
response = client.get('/')
with open('dashboard.html', 'wb') as file:
    file.write(response.data)


In [3]:

from metbit import pca, opls_da
from sklearn.datasets import load_iris
import pandas as pd




# Load sample data
iris = load_iris()

In [4]:

df = pd.DataFrame(iris['data'])
df.columns = iris['feature_names']
df['target'] = iris['target']


In [5]:
X = df.iloc[:, :-1]
y = df['target']
features = df.columns[:-1]

In [6]:
pca_ = pca(X=pd.DataFrame(X), label=y, features_name=features.tolist())

In [7]:
pca_.fit()

In [8]:
pca_.plot_cumulative_observed()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [9]:
pca_.plot_loading_()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [10]:
pca_.plot_pca_scores()

ValueError: Value of 'hover_data_4' is not the name of a column in 'data_frame'. Expected one of ['PC1', 'PC2', 'Group', 'Index'] but received: Features

In [None]:
df.drop(df.loc[df['target']==2].index, inplace=True)

In [None]:
X = df.iloc[:, :-1]
y = df['target']
features = df.columns[:-1]


In [None]:
opls_ = opls_da(X=X, y=y, features_name=features.tolist())

In [None]:
opls_.fit()

In [None]:
opls_.vip_scores()

In [None]:
opls_.vip_plot(vip_trans_form=True, threshold=0.5)

In [None]:
from lingress import lin_regression

In [None]:
y.value_counts()

In [None]:
lin_ = lin_regression(x=X, target=y, label=y, features_name=features)
lin_.create_dataset()
lin_.fit_model()
lin_.volcano_plot(p_val_cut_off=1, fc_cut_off=1)