<a href="https://colab.research.google.com/github/abdulhameed04/Rainfall-Prediction-Project--Kaggle-Competition/blob/main/Rainfall_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -----------------------------------------
# Install (if needed) and import libraries
# -----------------------------------------
!pip install xgboost --quiet

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier

# -----------------------------------------
# Load CSV files
# -----------------------------------------
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')

# -----------------------------------------
# Inspect (optional)
# -----------------------------------------
print(train.head())
print(test.head())
print(train.info())

# -----------------------------------------
# Feature/target selection
# -----------------------------------------
# Remove columns not in test set or not useful for prediction
X = train.drop(['id', 'rainfall'], axis=1)
y = train['rainfall']

# For test, drop only 'id'
X_test = test.drop(['id'], axis=1)

# Ensure columns match
assert (X.columns == X_test.columns).all(), "Train and test columns do not match!"

# -----------------------------------------
# Handle missing values (if any)
# -----------------------------------------
# Simple approach: fill with median
X = X.fillna(X.median())
X_test = X_test.fillna(X_test.median())

# -----------------------------------------
# (Optional) Train/validation split to assess performance
# -----------------------------------------
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

# -----------------------------------------
# Model training with XGBoost (handles imbalanced data well)
# -----------------------------------------
model = XGBClassifier(
    random_state=42,
    n_estimators=200,
    max_depth=5,
    learning_rate=0.05,
    n_jobs=-1,
    use_label_encoder=False,
    eval_metric='logloss'
)

model.fit(X_train, y_train)

# -----------------------------------------
# (Optional) Validation
# -----------------------------------------
val_pred = model.predict(X_val)
print("Validation Accuracy:", accuracy_score(y_val, val_pred))
print(classification_report(y_val, val_pred))

# -----------------------------------------
# Predict on the test set
# -----------------------------------------
test_pred = model.predict(X_test)

# -----------------------------------------
# Prepare submission
# -----------------------------------------
submission['rainfall'] = test_pred.astype(int)   # Ensure integer output if needed
submission.to_csv('rainfall_submission.csv', index=False)
print("Submission file 'rainfall_submission.csv' is ready!")


   id  day  pressure  maxtemp  temparature  mintemp  dewpoint  humidity  \
0   0    1    1017.4     21.2         20.6     19.9      19.4      87.0   
1   1    2    1019.5     16.2         16.9     15.8      15.4      95.0   
2   2    3    1024.1     19.4         16.1     14.6       9.3      75.0   
3   3    4    1013.4     18.1         17.8     16.9      16.8      95.0   
4   4    5    1021.8     21.3         18.4     15.2       9.6      52.0   

   cloud  sunshine  winddirection  windspeed  rainfall  
0   88.0       1.1           60.0       17.2         1  
1   91.0       0.0           50.0       21.9         1  
2   47.0       8.3           70.0       18.1         1  
3   95.0       0.0           60.0       35.6         1  
4   45.0       3.6           40.0       24.8         0  
     id  day  pressure  maxtemp  temparature  mintemp  dewpoint  humidity  \
0  2190    1    1019.5     17.5         15.8     12.7      14.9      96.0   
1  2191    2    1016.5     17.5         16.5     15.8

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


Validation Accuracy: 0.8571428571428571
              precision    recall  f1-score   support

           0       0.74      0.64      0.69        81
           1       0.89      0.93      0.91       248

    accuracy                           0.86       329
   macro avg       0.82      0.78      0.80       329
weighted avg       0.85      0.86      0.85       329

Submission file 'rainfall_submission.csv' is ready!
