<a href="https://colab.research.google.com/github/aryanandre2024-cyber/SY-BIOTECH-PY/blob/main/Aryan%20updated%20py%20project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Drug Response Prediction: Predicting IC50 values using gene expression features
# This is a simple Python script for beginners. It uses synthetic data to mimic GDSC/CellMiner dataset.
# IC50 prediction is treated as a regression task (predicting continuous values).

# Step 1: Import necessary libraries
import pandas as pd  # For data handling
import numpy as np  # For numerical operations
from sklearn.model_selection import train_test_split  # To split data
from sklearn.linear_model import LinearRegression  # Simple regression model
from sklearn.metrics import mean_squared_error  # To evaluate predictions

# Step 2: Create synthetic dataset (simulating gene expression features and IC50 values)
# In real life, you'd load from a file like: data = pd.read_csv('your_dataset.csv')
# Here, we generate fake data: 100 samples, 10 gene features, 1 IC50 target.
np.random.seed(42)  # For reproducible results
n_samples = 100
n_features = 10  # Gene expression features
gene_features = np.random.randn(n_samples, n_features)  # Random gene data
ic50_values = np.random.randn(n_samples) * 10 + 50  # Random IC50 values (around 50 for simplicity)

# Create a DataFrame
data = pd.DataFrame(gene_features, columns=[f'Gene_{i+1}' for i in range(n_features)])
data['IC50'] = ic50_values

# Step 3: Prepare data for modeling
X = data.drop('IC50', axis=1)  # Features (gene expressions)
y = data['IC50']  # Target (IC50 values)

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train a simple Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Make predictions and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)  # Mean Squared Error as a measure of accuracy

# Step 6: Display results
print("Model trained successfully!")
print(f"Mean Squared Error on test data: {mse:.2f}")
print("Sample predictions:")
for i in range(5):  # Show first 5 predictions
    print(f"Actual IC50: {y_test.iloc[i]:.2f}, Predicted IC50: {y_pred[i]:.2f}")

        # Real-life note: In precision medicine, low IC50 means the drug is effective for that cell line.
            # To expand: Try other models like RandomForestRegressor from sklearn.ensemble.

Model trained successfully!
Mean Squared Error on test data: 118.05
Sample predictions:
Actual IC50: 70.77, Predicted IC50: 46.99
Actual IC50: 52.61, Predicted IC50: 53.86
Actual IC50: 49.53, Predicted IC50: 56.40
Actual IC50: 58.84, Predicted IC50: 51.13
Actual IC50: 48.27, Predicted IC50: 50.67
