In [1]:
from sklearn.decomposition import TruncatedSVD
import numpy as np

# Sample data: a 4x5 matrix
X = np.array([
    [1, 2, 3, 4, 5],
    [4, 5, 6, 7, 8],
    [7, 8, 9, 10, 11],
    [10, 11, 12, 13, 14]
])

# Initialize TruncatedSVD with n_components = 2 to reduce dimensionality to 2
svd = TruncatedSVD(n_components=2, random_state=42)
X_reduced = svd.fit_transform(X)

print("Original data shape:", X.shape)
print("Reduced data shape:", X_reduced.shape)
print("Reduced data:\n", X_reduced)

Original data shape: (4, 5)
Reduced data shape: (4, 2)
Reduced data:
 [[ 7.11931431e+00 -2.07734536e+00]
 [ 1.37454287e+01 -1.03111042e+00]
 [ 2.03715432e+01  1.51245236e-02]
 [ 2.69976576e+01  1.06135946e+00]]


In [11]:
import pandas as pd
# Example DataFrame
df = pd.DataFrame({
    'versioned_ensembl_gene_id': ['gene1', 'gene2', 'gene3','gene3', 'gene1', 'gene2'],
    'specimen_id': ['a', "a", 'b', 'c', 'd', 'e'],
    'tpm': [10, 15, 20, 5, 8, 5]
})

# Pivot the DataFrame
df_transposed = df.pivot(index='specimen_id', columns='versioned_ensembl_gene_id', values='tpm')

# Reset the column name if needed (optional)
df_transposed = df_transposed.reset_index().set_index('specimen_id')
df_transposed.head()

versioned_ensembl_gene_id,gene1,gene2,gene3
specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,10.0,15.0,
b,,,20.0
c,,,5.0
d,8.0,,
e,,5.0,


In [9]:
import numpy as np
import pandas as pd

def quantile_normalize(df):
    # Step 1: Sort each column and compute ranks
    sorted_df = np.sort(df.values, axis=0)
    
    # Step 2: Compute the mean across ranks
    rank_means = np.mean(sorted_df, axis=1)
    
    # Step 3: Apply mean ranks to the original data
    normalized_df = pd.DataFrame(
        df.apply(lambda x: [rank_means[i] for i in pd.Series(x).rank(method="min").astype(int) - 1], axis=0),
        index=df.index,
        columns=df.columns
    )
    
    return normalized_df

# Example usage
data = pd.DataFrame({
    'Sample1': [-5, 3, 4, 2, 20],
    'Sample2': [-10, 8, 6, 4, 2],
    'Sample3': [13, 6, 9, 12, 15]
})

normalized_data = quantile_normalize(data)
print(normalized_data)

     Sample1    Sample2    Sample3
0  -3.000000  -3.000000   7.666667
1   6.333333  14.333333  -3.000000
2   7.666667   7.666667   4.333333
3   4.333333   6.333333   6.333333
4  14.333333   4.333333  14.333333


In [11]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Create a dataframe
df = pd.DataFrame({'A': [10, 2, 3], 'B': [4, 5, 6]})

# Create a StandardScaler object
scaler = StandardScaler()

# Fit the scaler to the data
scaler.fit(df)

# Transform the data using the fitted scaler
df_scaled = scaler.transform(df)

print(df_scaled)

[[ 1.40487872 -1.22474487]
 [-0.84292723  0.        ]
 [-0.56195149  1.22474487]]


In [18]:
import numpy as np

# Example array with some NaN values
tmp = np.array([2.0, 8.0])
all_nan = np.array([[np.nan,np.nan, np.nan], [1, 4, 3]])
with np.errstate(invalid='ignore'):  # Suppress warnings for all-NaN slices
    nonzero_median = np.nanquantile(all_nan, q=0.5, axis=1).astype(tmp.dtype)
# Replace NaN values with 1
nonzero_median = np.where(np.isnan(nonzero_median), 1, nonzero_median)

print(nonzero_median)

[1. 3.]


In [2]:
import pandas as pd

# Sample data
data = {
    "year_of_birth": ["1986-01-01", "1990-05-10", "1982-07-15"],
    "date_of_boost": ["2016-09-12", "2017-08-20", "2019-03-22"],
    "actual_day_relative_to_boost": [-4, 10, 0]
}

df = pd.DataFrame(data)

# Convert year_of_birth and date_of_boost to datetime
df["year_of_birth"] = pd.to_datetime(df["year_of_birth"])
df["date_of_boost"] = pd.to_datetime(df["date_of_boost"])

# Calculate the adjusted date of boost
df["adjusted_boost_date"] = df["date_of_boost"] + pd.to_timedelta(df["actual_day_relative_to_boost"], unit="D")

# Calculate the age as a float (years with decimals)
df["age_at_adjusted_boost"] = (df["adjusted_boost_date"] - df["year_of_birth"]).dt.total_seconds() / (365.25 * 24 * 60 * 60)

# Drop the intermediate column if desired
df.drop(columns=["adjusted_boost_date"], inplace=True)

print(df)


  year_of_birth date_of_boost  actual_day_relative_to_boost  \
0    1986-01-01    2016-09-12                            -4   
1    1990-05-10    2017-08-20                            10   
2    1982-07-15    2019-03-22                             0   

   age_at_adjusted_boost  
0              30.685832  
1              27.307324  
2              36.684463  
