<a href="https://colab.research.google.com/github/abinashkng/Pharma-Batch-Yield-Prediction/blob/main/Pharma_Batch_Yield_Prediction_(Regression).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. IMPORTS
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error, r2_score
import joblib

In [None]:
# 2. LOAD DATA
df = pd.read_csv("pharma_batch_yield_data.csv")
print("Head:\n", df.head())
print("\nSummary:\n", df.describe())

In [None]:
# 3. BASIC EDA (Yield Distribution over Batch)
plt.figure(figsize=(6,4))
plt.hist(df["batch_yield"], bins=20)
plt.title("Yield Distribution")
plt.xlabel("Batch Yield")
plt.ylabel("Count")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(df["temperature"], df["batch_yield"])
plt.title("Temperature vs Yield")
plt.xlabel("Temperature")
plt.ylabel("Yield")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(df["humidity"], df["batch_yield"])
plt.title("Humidity vs Yield")
plt.xlabel("Humidity")
plt.ylabel("Yield")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(df["mixing_time"], df["batch_yield"])
plt.title("Mixing Time vs Yield")
plt.xlabel("Mixing Time")
plt.ylabel("Yield")
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.scatter(df["ingredient_ratio"], df["batch_yield"])
plt.title("Ingredient Ratio vs Yield")
plt.xlabel("Ingredient Ratio")
plt.ylabel("Yield")
plt.show()

In [None]:
# 4. TRAIN MODEL
# Feature matrix and target
X = df[[df.columns =! "batch_yield"]]
y = df["batch_yield"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42)

# Model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 5. EVALUATE MODEL
y_pred = model.predict(X_test)

rmse = root_mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("\nRMSE:", rmse)
print("RÂ²:", r2)

In [None]:
# 6. FEATURE IMPORTANCE (Coefficients)
importance = model.coef_
features = X.columns

plt.figure(figsize=(6,4))
plt.bar(features, importance)
plt.title("Feature Importance (Linear Regression Coefficients)")
plt.ylabel("Coefficient Value")
plt.xticks(rotation=45)
plt.show()

print("\nFeature Importance:")
for f, i in zip(features, importance):
    print(f"{f}: {i}")

In [None]:
# 7. SAVE MODEL
joblib.dump(model, "batch_yield_model.pkl")
print("\nModel saved as batch_yield_model.pkl")