In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Load your dataset
file_path = 'weatherAUS.csv.csv'  # Update with your actual file path
df = pd.read_csv(file_path)

# Step 2: Replace null values with the average of each column
df.fillna(df.mean(), inplace=True)

# Step 3: Define features (X) and target variable (y)
X = df.drop('RainTomorrow', axis=1)  # Assuming 'RainTomorrow' is the target variable
y = df['RainTomorrow']

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Initialize the model (Random Forest Classifier)
model = RandomForestClassifier(random_state=42)

# Step 6: Train the model
model.fit(X_train, y_train)

# Step 7: Predict on the test set
y_pred = model.predict(X_test)

# Step 8: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Additional evaluation metrics
print(classification_report(y_test, y_pred))

# Step 9: Optionally, deploy the model or use it for predictions on new data
# For deployment, you would typically serialize the model using joblib or pickle



In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [3]:

# Step 1: Load your dataset
file_path = 'weatherAUS.csv'  # Update with your actual file path
df = pd.read_csv(file_path)

In [4]:
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,0.0,No
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,25.0,1010.6,1007.8,,,17.2,24.3,No,0.0,No
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,0.0,No
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,16.0,1017.6,1012.8,,,18.1,26.5,No,1.0,No
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,0.2,No


In [8]:
df.isnull().sum()

Date                 0
Location             0
MinTemp            637
MaxTemp            322
Rainfall          1406
Evaporation      60843
Sunshine         67816
WindGustDir       9330
WindGustSpeed     9270
WindDir9am       10013
WindDir3pm        3778
WindSpeed9am      1348
WindSpeed3pm      2630
Humidity9am       1774
Humidity3pm       3610
Pressure9am      14014
Pressure3pm      13981
Cloud9am         53657
Cloud3pm         57094
Temp9am            904
Temp3pm           2726
RainToday         1406
RISK_MM              0
RainTomorrow         0
dtype: int64

In [10]:
df.count()

Date             142193
Location         142193
MinTemp          141556
MaxTemp          141871
Rainfall         140787
Evaporation       81350
Sunshine          74377
WindGustDir      132863
WindGustSpeed    132923
WindDir9am       132180
WindDir3pm       138415
WindSpeed9am     140845
WindSpeed3pm     139563
Humidity9am      140419
Humidity3pm      138583
Pressure9am      128179
Pressure3pm      128212
Cloud9am          88536
Cloud3pm          85099
Temp9am          141289
Temp3pm          139467
RainToday        140787
RISK_MM          142193
RainTomorrow     142193
dtype: int64

In [23]:
df.columns

Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',
       'Sunshine', 'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
       'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
       'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
       'Temp3pm', 'RainToday', 'RISK_MM', 'RainTomorrow'],
      dtype='object')

In [25]:
df.dtypes

Date              object
Location          object
MinTemp          float64
MaxTemp          float64
Rainfall         float64
Evaporation      float64
Sunshine         float64
WindGustDir       object
WindGustSpeed    float64
WindDir9am        object
WindDir3pm        object
WindSpeed9am     float64
WindSpeed3pm     float64
Humidity9am      float64
Humidity3pm      float64
Pressure9am      float64
Pressure3pm      float64
Cloud9am         float64
Cloud3pm         float64
Temp9am          float64
Temp3pm          float64
RainToday         object
RISK_MM          float64
RainTomorrow      object
dtype: object

In [32]:
df1 = df.dropna(subset=['Evaporation'])


In [34]:
df1.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
5939,2009-01-01,Cobar,17.9,35.2,0.0,12.0,12.3,SSW,48.0,ENE,...,13.0,1006.3,1004.4,2.0,5.0,26.6,33.4,No,0.0,No
5940,2009-01-02,Cobar,18.4,28.9,0.0,14.8,13.0,S,37.0,SSE,...,8.0,1012.9,1012.1,1.0,1.0,20.3,27.0,No,0.0,No
5941,2009-01-03,Cobar,15.5,34.1,0.0,12.6,13.3,SE,30.0,,...,7.0,,1011.6,,1.0,,32.7,No,0.0,No
5942,2009-01-04,Cobar,19.4,37.6,0.0,10.8,10.6,NNE,46.0,NNE,...,22.0,1012.3,1009.2,1.0,6.0,28.7,34.9,No,0.0,No
5943,2009-01-05,Cobar,21.9,38.4,0.0,11.4,12.2,WNW,31.0,WNW,...,22.0,1012.7,1009.1,1.0,5.0,29.1,35.6,No,0.0,No


In [35]:
df1.isnull().sum()

Date                 0
Location             0
MinTemp            108
MaxTemp             66
Rainfall           257
Evaporation          0
Sunshine         10932
WindGustDir       4885
WindGustSpeed     4860
WindDir9am        3524
WindDir3pm        1200
WindSpeed9am       427
WindSpeed3pm       839
Humidity9am        527
Humidity3pm       1614
Pressure9am        381
Pressure3pm        382
Cloud9am          9583
Cloud3pm         12213
Temp9am            148
Temp3pm           1190
RainToday          257
RISK_MM              0
RainTomorrow         0
dtype: int64

In [36]:
df1 = df.dropna(subset=['Sunshine'])


In [37]:
df1.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
5939,2009-01-01,Cobar,17.9,35.2,0.0,12.0,12.3,SSW,48.0,ENE,...,13.0,1006.3,1004.4,2.0,5.0,26.6,33.4,No,0.0,No
5940,2009-01-02,Cobar,18.4,28.9,0.0,14.8,13.0,S,37.0,SSE,...,8.0,1012.9,1012.1,1.0,1.0,20.3,27.0,No,0.0,No
5941,2009-01-03,Cobar,15.5,34.1,0.0,12.6,13.3,SE,30.0,,...,7.0,,1011.6,,1.0,,32.7,No,0.0,No
5942,2009-01-04,Cobar,19.4,37.6,0.0,10.8,10.6,NNE,46.0,NNE,...,22.0,1012.3,1009.2,1.0,6.0,28.7,34.9,No,0.0,No
5943,2009-01-05,Cobar,21.9,38.4,0.0,11.4,12.2,WNW,31.0,WNW,...,22.0,1012.7,1009.1,1.0,5.0,29.1,35.6,No,0.0,No


In [38]:
df1.isnull().sum()

Date                0
Location            0
MinTemp            72
MaxTemp            30
Rainfall          218
Evaporation         0
Sunshine            0
WindGustDir      4226
WindGustSpeed    4203
WindDir9am       2530
WindDir3pm        645
WindSpeed9am      149
WindSpeed3pm      361
Humidity9am       368
Humidity3pm       542
Pressure9am       109
Pressure3pm       114
Cloud9am         6314
Cloud3pm         7828
Temp9am           108
Temp3pm           300
RainToday         218
RISK_MM             0
RainTomorrow        0
dtype: int64

In [39]:
df1.columns

Index(['Date', 'Location', 'MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation',
       'Sunshine', 'WindGustDir', 'WindGustSpeed', 'WindDir9am', 'WindDir3pm',
       'WindSpeed9am', 'WindSpeed3pm', 'Humidity9am', 'Humidity3pm',
       'Pressure9am', 'Pressure3pm', 'Cloud9am', 'Cloud3pm', 'Temp9am',
       'Temp3pm', 'RainToday', 'RISK_MM', 'RainTomorrow'],
      dtype='object')

In [41]:
import pandas as pd
from sklearn.impute import SimpleImputer


# Columns to impute with mean
columns_to_impute_mean = ['MinTemp', 'MaxTemp', 'WindGustSpeed', 'WindSpeed9am', 'WindSpeed3pm',
                          'Humidity9am', 'Humidity3pm', 'Pressure9am', 'Pressure3pm',
                          'Temp9am', 'Temp3pm','Cloud9am', 'Cloud3pm']

# Columns to impute with mode
columns_to_impute_mode = ['Rainfall', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday']

# Mean imputation
imputer_mean = SimpleImputer(strategy='mean')
df1[columns_to_impute_mean] = imputer_mean.fit_transform(df1[columns_to_impute_mean])

# Mode imputation
imputer_mode = SimpleImputer(strategy='most_frequent')
df1[columns_to_impute_mode] = imputer_mode.fit_transform(df1[columns_to_impute_mode])

# Verify if there are any remaining null values
null_counts_after = df1.isnull().sum()
print("Null value counts after imputation:")
print(null_counts_after)

# Now you can proceed with your analysis or modeling


Null value counts after imputation:
Date             0
Location         0
MinTemp          0
MaxTemp          0
Rainfall         0
Evaporation      0
Sunshine         0
WindGustDir      0
WindGustSpeed    0
WindDir9am       0
WindDir3pm       0
WindSpeed9am     0
WindSpeed3pm     0
Humidity9am      0
Humidity3pm      0
Pressure9am      0
Pressure3pm      0
Cloud9am         0
Cloud3pm         0
Temp9am          0
Temp3pm          0
RainToday        0
RISK_MM          0
RainTomorrow     0
dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[columns_to_impute_mean] = imputer_mean.fit_transform(df1[columns_to_impute_mean])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1[columns_to_impute_mode] = imputer_mode.fit_transform(df1[columns_to_impute_mode])


In [44]:
df1.shape

(70418, 24)

In [50]:
df1.drop(['Date'],axis = 1)

Unnamed: 0,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
5939,Cobar,17.9,35.2,0.0,12.0,12.3,SSW,48.0,ENE,SW,...,13.0,1006.30000,1004.4,2.000000,5.0,26.600000,33.4,No,0.0,No
5940,Cobar,18.4,28.9,0.0,14.8,13.0,S,37.0,SSE,SSE,...,8.0,1012.90000,1012.1,1.000000,1.0,20.300000,27.0,No,0.0,No
5941,Cobar,15.5,34.1,0.0,12.6,13.3,SE,30.0,N,N,...,7.0,1017.43818,1011.6,4.274694,1.0,17.608643,32.7,No,0.0,No
5942,Cobar,19.4,37.6,0.0,10.8,10.6,NNE,46.0,NNE,NNW,...,22.0,1012.30000,1009.2,1.000000,6.0,28.700000,34.9,No,0.0,No
5943,Cobar,21.9,38.4,0.0,11.4,12.2,WNW,31.0,WNW,WSW,...,22.0,1012.70000,1009.1,1.000000,5.0,29.100000,35.6,No,0.0,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139108,Darwin,19.3,33.4,0.0,6.0,11.0,ENE,35.0,SE,NE,...,32.0,1013.90000,1010.5,0.000000,1.0,24.500000,32.3,No,0.0,No
139109,Darwin,21.2,32.6,0.0,7.6,8.6,E,37.0,SE,SE,...,28.0,1014.60000,1011.2,7.000000,0.0,24.800000,32.0,No,0.0,No
139110,Darwin,20.7,32.8,0.0,5.6,11.0,E,33.0,E,W,...,23.0,1015.30000,1011.8,0.000000,0.0,24.800000,32.1,No,0.0,No
139111,Darwin,19.5,31.8,0.0,6.2,10.6,ESE,26.0,SE,NNW,...,58.0,1014.90000,1010.7,1.000000,1.0,24.800000,29.2,No,0.0,No


In [85]:

# Step 3: Define features (X) and target variable (y)
X = df1.drop('RainTomorrow', axis=1)  # Assuming 'RainTomorrow' is the target variable
y = df1['RainTomorrow']


In [65]:
X= X.drop(['Date'],axis =1)

In [86]:

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#


In [87]:
 
model = RandomForestClassifier(random_state=42)


In [88]:
df1.dtypes

Date             datetime64[ns]
Location                  int64
MinTemp                 float64
MaxTemp                 float64
Rainfall                float64
Evaporation             float64
Sunshine                float64
WindGustDir               int64
WindGustSpeed           float64
WindDir9am                int64
WindDir3pm                int64
WindSpeed9am            float64
WindSpeed3pm            float64
Humidity9am             float64
Humidity3pm             float64
Pressure9am             float64
Pressure3pm             float64
Cloud9am                float64
Cloud3pm                float64
Temp9am                 float64
Temp3pm                 float64
RainToday                 int64
RISK_MM                 float64
RainTomorrow              int64
Year                      int64
Month                     int64
Day                       int64
dtype: object

In [63]:


# Step 1: Convert the 'Date' column to datetime and extract useful features
df1['Date'] = pd.to_datetime(df1['Date'])
df1['Year'] = df1['Date'].dt.year
df1['Month'] = df1['Date'].dt.month
df1['Day'] = df1['Date'].dt.day

# Step 2: Convert categorical columns using label encoding
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
categorical_columns = ['Location', 'WindGustDir', 'WindDir9am', 'WindDir3pm', 'RainToday', 'RainTomorrow']

for col in categorical_columns:
    label_encoders[col] = LabelEncoder()
    df1[col] = label_encoders[col].fit_transform(df1[col])

# Step 3: Convert 'Rainfall' to numeric
df1['Rainfall'] = pd.to_numeric(df1['Rainfall'], errors='coerce')

# Step 4: Handle any remaining null values
df1.fillna(df1.mean(), inplace=True)  # Using mean imputation for simplicity

# Verify the conversion
print(df1.dtypes)

# Now df has all numerical or datetime columns


Date             datetime64[ns]
Location                  int64
MinTemp                 float64
MaxTemp                 float64
Rainfall                float64
Evaporation             float64
Sunshine                float64
WindGustDir               int64
WindGustSpeed           float64
WindDir9am                int64
WindDir3pm                int64
WindSpeed9am            float64
WindSpeed3pm            float64
Humidity9am             float64
Humidity3pm             float64
Pressure9am             float64
Pressure3pm             float64
Cloud9am                float64
Cloud3pm                float64
Temp9am                 float64
Temp3pm                 float64
RainToday                 int64
RISK_MM                 float64
RainTomorrow              int64
Year                      int64
Month                     int64
Day                       int64
dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Date'] = pd.to_datetime(df1['Date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Year'] = df1['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Month'] = df1['Date'].dt.month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer

# 

In [61]:
df1.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow
5939,2009-01-01,Cobar,17.9,35.2,0.0,12.0,12.3,SSW,48.0,ENE,...,13.0,1006.3,1004.4,2.0,5.0,26.6,33.4,No,0.0,No
5940,2009-01-02,Cobar,18.4,28.9,0.0,14.8,13.0,S,37.0,SSE,...,8.0,1012.9,1012.1,1.0,1.0,20.3,27.0,No,0.0,No
5941,2009-01-03,Cobar,15.5,34.1,0.0,12.6,13.3,SE,30.0,N,...,7.0,1017.43818,1011.6,4.274694,1.0,17.608643,32.7,No,0.0,No
5942,2009-01-04,Cobar,19.4,37.6,0.0,10.8,10.6,NNE,46.0,NNE,...,22.0,1012.3,1009.2,1.0,6.0,28.7,34.9,No,0.0,No
5943,2009-01-05,Cobar,21.9,38.4,0.0,11.4,12.2,WNW,31.0,WNW,...,22.0,1012.7,1009.1,1.0,5.0,29.1,35.6,No,0.0,No


In [89]:

# Step 6: Train the model
model.fit(X_train, y_train)


TypeError: The DType <class 'numpy.dtype[datetime64]'> could not be promoted by <class 'numpy.dtype[float64]'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtype[datetime64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[float64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[int64]'>, <class 'numpy.dtype[int64]'>)

In [69]:

# Step 7: Predict on the test set
y_pred = model.predict(X_test)


In [72]:

# Step 8: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 1.00


In [73]:

# Additional evaluation metrics
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     10942
           1       1.00      1.00      1.00      3142

    accuracy                           1.00     14084
   macro avg       1.00      1.00      1.00     14084
weighted avg       1.00      1.00      1.00     14084



In [74]:
df1.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow,Year,Month,Day
5939,2009-01-01,6,17.9,35.2,0.0,12.0,12.3,11,48.0,1,...,2.0,5.0,26.6,33.4,0,0.0,0,2009,1,1
5940,2009-01-02,6,18.4,28.9,0.0,14.8,13.0,8,37.0,10,...,1.0,1.0,20.3,27.0,0,0.0,0,2009,1,2
5941,2009-01-03,6,15.5,34.1,0.0,12.6,13.3,9,30.0,3,...,4.274694,1.0,17.608643,32.7,0,0.0,0,2009,1,3
5942,2009-01-04,6,19.4,37.6,0.0,10.8,10.6,5,46.0,5,...,1.0,6.0,28.7,34.9,0,0.0,0,2009,1,4
5943,2009-01-05,6,21.9,38.4,0.0,11.4,12.2,14,31.0,14,...,1.0,5.0,29.1,35.6,0,0.0,0,2009,1,5


In [75]:
df1.tail()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RISK_MM,RainTomorrow,Year,Month,Day
139108,2017-06-20,9,19.3,33.4,0.0,6.0,11.0,1,35.0,9,...,0.0,1.0,24.5,32.3,0,0.0,0,2017,6,20
139109,2017-06-21,9,21.2,32.6,0.0,7.6,8.6,0,37.0,9,...,7.0,0.0,24.8,32.0,0,0.0,0,2017,6,21
139110,2017-06-22,9,20.7,32.8,0.0,5.6,11.0,0,33.0,0,...,0.0,0.0,24.8,32.1,0,0.0,0,2017,6,22
139111,2017-06-23,9,19.5,31.8,0.0,6.2,10.6,2,26.0,9,...,1.0,1.0,24.8,29.2,0,0.0,0,2017,6,23
139112,2017-06-24,9,20.2,31.7,0.0,5.6,10.7,1,30.0,1,...,6.0,5.0,25.4,31.0,0,0.0,0,2017,6,24


In [78]:
for i in y_pred:
    print(i)

0
0
0
0
1
1
0
0
1
1
1
0
1
0
1
0
0
0
0
1
0
0
0
1
0
0
0
0
1
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
1
0
0
0
1
0
0
0
0
1
1
1
0
0
0
0
0
1
0
0
1
0
1
0
1
0
1
0
0
0
0
0
1
1
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
1
0
0
0
1
1
0
0
0
1
1
0
0
0
1
0
1
1
1
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
1
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
1
0
0
0
1
0
0
0
0
1
0
1
0
1
1
0
1
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
1
0
0
0
0
1
0
0
0
1
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
0
1
1
0
1
0
0
1
1
1
0
0
0
0
0
0
1
0
0
1
0
0
0
0
1
1
1
0
0
0
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
1
0
0
0
1
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
1
0
0
0
1
0
0
0
0
1
0
0
1
0
0
0
1
0
1
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
0
0
1
0
1
0
0
0
0
0
1
0
0
1
0
1
0
0
1
0
0
1
0
0
0
0
0
0
0
0
1
0
1
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
1
1
1
0
0
0
0
0
0
0
0
1
0
0
1
0
1
0
0
0
0
0
0
1
0
0
1
0
0
0
1
0
1
1
1
0
0
1
0
0
1
0
0
1
0
1
1
0
0
0
0
0
1
0
1
1
0
0
0
0
0
0
0
0
0
0
1
0
0
0
0
0
0
0
0
0
0
1
1


In [83]:
y_train

115684    0
76516     1
33831     1
80787     0
97439     1
         ..
82724     0
22884     0
108000    1
9250      0
39268     0
Name: RainTomorrow, Length: 56334, dtype: int64

In [84]:
#the model accuracy can never be 1 so i think there is somthing wrong with this model or my approach as i have neglected LOT of values , possible half of them because the sunshine and evaporation was mission for most of them