# Import libraries

### Pandas
### Numpy
### Seaborn
## Matplotlib

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Read the CSV File 

In [None]:
df = pd.read_csv(r"C:\Users\zabiz\Downloads\ML_Models\Regression_Models\Theil-Sen Regression/covid_vaccine_efficacy_dataset.csv")

### Explore the Data

In [None]:
df.head()

### Remove Irrelevant Columns

In [None]:
df = df.drop(columns=["Month"], axis=1)

### Check it the column remove successfully or not ( Successfully removed it)

In [None]:
df.head()

### Import Label Encdoer from sklearn with Module preprocessing for convert categorical data into Numerical 

In [None]:
from sklearn.preprocessing import LabelEncoder

### Apply Label Encoder

In [None]:
le = LabelEncoder()
df["Country"] = le.fit_transform(df["Country"])

### Check the country column convert into numerical or not (sucessfully converted into numerical)

In [None]:
df.head()

### Check the Null Values

In [None]:
df.isnull().sum()

### Check Inofrmation about the data

In [None]:
df.info()

### Check the Stastistical Summary of the data

In [None]:
df.describe()

### Check the outlier in data with the boxplot

In [None]:
plt.figure(figsize=(15,8))
sns.boxplot(data=df, orient='h')
plt.title("Boxplot")
plt.show()


### Check the relationship between the columns through pairplot 

sns.pairplot(data=df)
plt.title("PairPlot")
plt.show()

### Check the co-relationship between the column through heatmap

In [None]:
sns.heatmap(df.corr(),annot=True,cmap="coolwarm")
plt.title("Heatmap")
plt.show()

### Split the data in x and y (Dependent & Independent Variable)

In [None]:
x = df.iloc[:,:-1]
y = df[["Case_Reduction_Rate"]]

### Import Train Test Split From sklearn module model_selection for spliting the data in Training and Testing 

In [None]:
from sklearn.model_selection import train_test_split

### Split the data in traning and testing 

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.4,random_state=42)

### Import Machine Learning Model TheilSenRegression from sklearn module Linear Model

In [None]:
from sklearn.linear_model import TheilSenRegressor

### Apply/Fit the model

In [None]:
tsr = TheilSenRegressor()
tsr.fit(x_train,y_train)

### Check the score (accurray) of the model for check the scanerio of the **overfitting** (But Our Model is not **overrfit**)

In [None]:
tsr.score(x_test,y_test)*100,tsr.score(x_train,y_train)*100,

In [None]:
#print(x_train.shape, x_test.shape)

### Import Metrics from sklearn to check overall performace of the model¶

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

### Check the Performance

In [None]:
y_pred = tsr.predict(x_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print(f"📉 MAE: {mae:.2f}")
print(f"📉 MSE: {mse:.2f}")
print(f"📉 RMSE: {rmse:.2f}")
print(f"🎯 R² Score: {r2 * 100:.2f}")


### Check the Performance of the model through Data Visualization 

In [None]:
# Make sure y_test is 1D (in case it's a DataFrame)
y_test_flat = y_test.values.ravel()
y_pred_flat = y_pred.ravel()
plt.figure(figsize=(10, 6))
sns.scatterplot(x=y_test_flat, y=y_pred_flat, color='skyblue', label='Predictions')
plt.plot([y_test_flat.min(), y_test_flat.max()], [y_test_flat.min(), y_test_flat.max()], color='red', linestyle='--', label='Ideal Prediction')
plt.xlabel("Case_Reduction_Rate")
plt.ylabel("Predicted Case_Reduction_Rate")
plt.title("Actual vs Predicted Case_Reduction_Rate")
plt.legend()
plt.grid(True)
plt.show()


## 📌 Conclusion

In this project, we implemented a **Theil-Sen Regressor**, a robust regression technique well-suited for handling outliers and noisy datasets.  
The workflow included:
- Data loading and preprocessing  
- Splitting data into training and testing sets  
- Model training using Theil-Sen Regression  
- Evaluation using MAE, MSE, RMSE, and R² Score  

**Model Performance:**
- 📉 MAE: 1.94  
- 📉 MSE: 7.88  
- 📉 RMSE: 2.81  
- 🎯 R² Score: 99.11% (train) and 99.15% (test)  

These results demonstrate **exceptional predictive accuracy** with minimal overfitting, indicating that the model generalizes extremely well to unseen data.  
The Theil-Sen Regressor proved to be a powerful choice for this dataset, maintaining high stability even in the presence of noise or potential outliers.  
Future improvements could include cross-validation, hyperparameter tuning, and comparison with other robust regression methods to further validate performance.
