# Rain Prediction

### libraries

In [282]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

### dataset

In [283]:
dataset = pd.read_csv(r'weatherAUS.csv')

### Defining the X and y 

In [284]:
X = dataset.iloc[:, [1, 2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]].values
y = dataset.iloc[:, -1].values
X

array([['Albury', 13.4, 22.9, ..., 16.9, 21.8, 'No'],
       ['Albury', 7.4, 25.1, ..., 17.2, 24.3, 'No'],
       ['Albury', 12.9, 25.7, ..., 21.0, 23.2, 'No'],
       ...,
       ['Uluru', 5.4, 26.9, ..., 12.5, 26.1, 'No'],
       ['Uluru', 7.8, 27.0, ..., 15.1, 26.0, 'No'],
       ['Uluru', 14.9, nan, ..., 15.0, 20.9, 'No']], dtype=object)

In [285]:
y

array(['No', 'No', 'No', ..., 'No', 'No', nan], dtype=object)

### changing y into 2D

In [286]:
y = y.reshape(-1,1)
y

array([['No'],
       ['No'],
       ['No'],
       ...,
       ['No'],
       ['No'],
       [nan]], dtype=object)

### cleaning the dataset

In [287]:
imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')


### fitting the datas


In [288]:
X = imputer.fit_transform(X)
X

array([['Albury', 13.4, 22.9, ..., 16.9, 21.8, 'No'],
       ['Albury', 7.4, 25.1, ..., 17.2, 24.3, 'No'],
       ['Albury', 12.9, 25.7, ..., 21.0, 23.2, 'No'],
       ...,
       ['Uluru', 5.4, 26.9, ..., 12.5, 26.1, 'No'],
       ['Uluru', 7.8, 27.0, ..., 15.1, 26.0, 'No'],
       ['Uluru', 14.9, 20.0, ..., 15.0, 20.9, 'No']], dtype=object)

In [289]:
y

array([['No'],
       ['No'],
       ['No'],
       ...,
       ['No'],
       ['No'],
       [nan]], dtype=object)

### converting all the datas into numerical form

In [290]:
X = imputer.fit_transform(X)
le1 = LabelEncoder()
X[:, 0] = le1.fit_transform(X[:, 0])

le2 = LabelEncoder()
X[:, 4] = le2.fit_transform(X[:, 4])

# Handle the problematic column with string value 'W'
column_with_w = 6  # Replace with the actual index of the column
le3 = LabelEncoder()
X[:, column_with_w] = le3.fit_transform(X[:, column_with_w])

le4 = LabelEncoder()
X[:, 7] = le4.fit_transform(X[:, 7])

le5 = LabelEncoder()
X[:, -1] = le5.fit_transform(X[:, -1])

le6 = LabelEncoder()
y = le6.fit_transform(y)

  y = column_or_1d(y, warn=True)


### features scaling

In [291]:
ss = StandardScaler()
X = ss.fit_transform(X)

In [292]:
X

array([[-1.53166617,  0.19132753, -0.04135977, ..., -0.01407077,
         0.02310362, -0.52979545],
       [-1.53166617, -0.75105231,  0.26874452, ...,  0.03244663,
         0.387799  , -0.52979545],
       [-1.53166617,  0.11279588,  0.35331842, ...,  0.62166712,
         0.22733303, -0.52979545],
       ...,
       [ 1.20928479, -1.06517892,  0.52246622, ..., -0.69632607,
         0.65037966, -0.52979545],
       [ 1.20928479, -0.68822699,  0.53656187, ..., -0.29317521,
         0.63579185, -0.52979545],
       [ 1.20928479,  0.42692249, -0.45013361, ..., -0.30868102,
        -0.10818671, -0.52979545]])

### spliiting the datas


In [293]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [294]:
X_train

array([[ 0.22535368,  1.03946939,  0.07140543, ...,  0.68369032,
         0.08145488, -0.52979545],
       [ 1.42012717, -0.45263203,  0.11369237, ..., -0.41722163,
         0.22733303, -0.52979545],
       [ 0.50647685, -0.20133073, -0.14002932, ..., -0.06058818,
        -0.02065982,  1.88752093],
       ...,
       [ 1.0687232 ,  0.75675544,  0.93124006, ...,  1.10234698,
         1.07342629, -0.52979545],
       [ 0.57675765, -0.04426743, -0.16822062, ...,  0.01694083,
        -0.28324049,  1.88752093],
       [ 1.63096955, -0.0285611 , -0.91529006, ..., -0.35519842,
        -0.76463838, -0.52979545]])

In [295]:
y_train

array([1, 0, 0, ..., 0, 0, 0])

### trainig the model

In [296]:
clf = RandomForestClassifier(n_estimators=100,random_state=0)
clf.fit(X_train,y_train)

In [297]:
clf.score(X_train,y_train)

0.999664856317888

In [298]:
y_pred = clf.predict(X_test)
y_pred

array([0, 1, 0, ..., 0, 0, 0])

### reversing the transformation

In [299]:
y_pred = le6.inverse_transform(y_pred)

In [300]:
y_pred

array(['No', 'Yes', 'No', ..., 'No', 'No', 'No'], dtype=object)

In [301]:
print(y_test)

[1 1 0 ... 1 0 0]


In [302]:
y_test = le6.inverse_transform(y_test)
y_test

array(['Yes', 'Yes', 'No', ..., 'Yes', 'No', 'No'], dtype=object)

In [303]:
y_test = y_test.reshape(-1,1)
y_pred = y_pred.reshape(-1,1)

### defining the dataframe

In [304]:
df =  np.concatenate((y_test,y_pred),axis=1)
df

array([['Yes', 'No'],
       ['Yes', 'Yes'],
       ['No', 'No'],
       ...,
       ['Yes', 'No'],
       ['No', 'No'],
       ['No', 'No']], dtype=object)

In [305]:
dataframe = pd.DataFrame(df,columns=['Rain on Tomorrow','Predictin of Rain'])
dataframe

Unnamed: 0,Rain on Tomorrow,Predictin of Rain
0,Yes,No
1,Yes,Yes
2,No,No
3,No,Yes
4,No,No
...,...,...
29087,No,Yes
29088,No,No
29089,Yes,No
29090,No,No


### accuracy of the model

In [306]:
y_test = y_test.astype(str) 
y_pred = y_pred.astype(str)  

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8410903341124708


In [307]:
dataframe.to_csv('prediction.csv')