<a href="https://colab.research.google.com/github/housemLassoued/ML-deployment/blob/main/Classifying_Rainy_Days.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import pandas as pd
import plotly.express as px
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier as decisionTreeClassifier

In [26]:
df=pd.read_csv("/content/Rainfall.csv")

In [27]:
df.head()

Unnamed: 0,day,pressure,maxtemp,temparature,mintemp,dewpoint,humidity,cloud,rainfall,sunshine,winddirection,windspeed
0,1,1025.9,19.9,18.3,16.8,13.1,72,49,yes,9.3,80.0,26.3
1,2,1022.0,21.7,18.9,17.2,15.6,81,83,yes,0.6,50.0,15.3
2,3,1019.7,20.3,19.3,18.0,18.4,95,91,yes,0.0,40.0,14.2
3,4,1018.9,22.3,20.6,19.1,18.8,90,88,yes,1.0,50.0,16.9
4,5,1015.9,21.3,20.7,20.2,19.9,95,81,yes,0.0,40.0,13.7


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 12 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   day                     366 non-null    int64  
 1   pressure                366 non-null    float64
 2   maxtemp                 366 non-null    float64
 3   temparature             366 non-null    float64
 4   mintemp                 366 non-null    float64
 5   dewpoint                366 non-null    float64
 6   humidity                366 non-null    int64  
 7   cloud                   366 non-null    int64  
 8   rainfall                366 non-null    object 
 9   sunshine                366 non-null    float64
 10           winddirection  365 non-null    float64
 11  windspeed               365 non-null    float64
dtypes: float64(8), int64(3), object(1)
memory usage: 34.4+ KB


In [29]:
df.dropna(inplace=True)

In [30]:
df.shape

(365, 12)

In [31]:
df['rainfall']=df['rainfall'].map({'yes':1,'no':0})

In [32]:

corr_matrix = df.corr()


fig = px.imshow(corr_matrix,
                text_auto=True,
                color_continuous_scale='Viridis',
                title='Heatmap_Corrélations')


fig.show()


In [33]:
df=df.drop(columns=['day','maxtemp','mintemp' ])

In [34]:

value_counts = df['rainfall'].value_counts().reset_index()
value_counts.columns = ['rainfall', 'count']


fig = px.pie(value_counts, names='rainfall', values='count',
             title='Distribution of Rainfall Categories')


fig.show()

In [35]:

df_majority = df[df['rainfall'] == 1]
df_minority = df[df['rainfall'] == 0]
df_minority_upsampled = resample(df_minority,
                                 replace=True,
                                 n_samples=len(df_majority),
                                 random_state=42)

df = pd.concat([df_majority, df_minority_upsampled])


In [36]:
x=df.drop(columns=['rainfall'])
y=df['rainfall']

In [37]:


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)


In [38]:


model = RandomForestClassifier()
model.fit(x_train, y_train)

In [39]:


y_pred = model.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=[f"Class {i}" for i in range(cm.shape[0])],
                     columns=[f"Class {i}" for i in range(cm.shape[1])])


fig = px.imshow(cm_df, text_auto=True, color_continuous_scale='Blues',
                labels={'x': 'Prédictions', 'y': 'Véritables Classes'},
                title='Matrice de Confusion')


fig.show()


print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.84      0.92      0.88        50
           1       0.91      0.82      0.86        50

    accuracy                           0.87       100
   macro avg       0.87      0.87      0.87       100
weighted avg       0.87      0.87      0.87       100



In [40]:

svc=SVC()
svc.fit(x_train,y_train)

In [41]:
y_pred = svc.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=[f"Class {i}" for i in range(cm.shape[0])],
                     columns=[f"Class {i}" for i in range(cm.shape[1])])

fig = px.imshow(cm_df, text_auto=True, color_continuous_scale='Blues',
                labels={'x': 'Prédictions', 'y': 'Véritables Classes'},
                title='Matrice de Confusion')


fig.show()

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.58      0.64        50
           1       0.64      0.76      0.70        50

    accuracy                           0.67       100
   macro avg       0.68      0.67      0.67       100
weighted avg       0.68      0.67      0.67       100



In [42]:

xgboost=XGBClassifier()
xgboost.fit(x_train,y_train)

In [43]:
y_pred = xgboost.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=[f"Class {i}" for i in range(cm.shape[0])],
                     columns=[f"Class {i}" for i in range(cm.shape[1])])

fig = px.imshow(cm_df, text_auto=True, color_continuous_scale='Blues',
                labels={'x': 'Prédictions', 'y': 'Véritables Classes'},
                title='Matrice de Confusion')

fig.show()

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.83      0.88      0.85        50
           1       0.87      0.82      0.85        50

    accuracy                           0.85       100
   macro avg       0.85      0.85      0.85       100
weighted avg       0.85      0.85      0.85       100



In [44]:

dT= decisionTreeClassifier()
dT.fit(x_train,y_train)

In [45]:
y_pred = dT.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=[f"Class {i}" for i in range(cm.shape[0])],
                     columns=[f"Class {i}" for i in range(cm.shape[1])])


fig = px.imshow(cm_df, text_auto=True, color_continuous_scale='Blues',
                labels={'x': 'Prédictions', 'y': 'Véritables Classes'},
                title='Matrice de Confusion')


fig.show()

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.92      0.87        50
           1       0.91      0.80      0.85        50

    accuracy                           0.86       100
   macro avg       0.87      0.86      0.86       100
weighted avg       0.87      0.86      0.86       100

