In [1]:
#import necessary libraries
import pandas as pd 
import numpy as np
from itertools import chain 
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

#function to create a flat list from a string
def Create_List(x):
    list_of_lists = [w.split() for w in x.split(',')]
    flat_list = list(chain(*list_of_lists))
    return flat_list
    
#function to categorize weather conditions
def Get_Weather(l1):
    if 'Fog' in l1 and 'Rain' in l1:
        return 'RAIN+FOG'
    elif 'Snow' in l1 and 'Rain' in l1:
        return 'SNOW+RAIN'
    elif 'Snow' in l1:
        return 'SNOW'
    elif 'Fog' in l1:
        return 'FOG'
    elif 'Clear' in l1:
        return 'Clear'
    elif 'Cloudy' in l1:
        return 'Cloudy'
    else:
        return 'RAIN'
        
#Weather categorization and sampling to balance classes
def preprocess_data(data):
    data['Std_Weather'] = data['Weather'].apply(lambda x: Get_Weather(Create_List(x)))
    cloudy_df = data[data['Std_Weather'] == 'Cloudy'].sample(600)
    clear_df = data[data['Std_Weather'] == 'Clear'].sample(600)
    rain_df = data[data['Std_Weather'] == 'RAIN']
    snow_df = data[data['Std_Weather'] == 'SNOW']
    weather_df = pd.concat([cloudy_df, clear_df, rain_df, snow_df], axis=0)
    weather_df.drop(columns=['Date/Time', 'Weather'], axis=1, inplace=True)
    return weather_df
    
# Splitting of data, scaling of features, training a RandomForestClassifier and evaluation of model
def train_model(weather_df):
    X = weather_df.drop(['Std_Weather'], axis=1)
    Y = weather_df['Std_Weather']
    
    std_scaler = StandardScaler()
    X_std = std_scaler.fit_transform(X)
    
    x_train, x_test, y_train, y_test = train_test_split(X_std, Y, test_size=0.2, random_state=42)
    
    rf_model = RandomForestClassifier(n_estimators=50, max_features='log2')
    rf_model.fit(x_train, y_train)
    
    y_pred_rf = rf_model.predict(x_test)
    print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
    print(classification_report(y_test, y_pred_rf))
    
    scores = cross_val_score(rf_model, X_std, Y, cv=5, scoring='accuracy')
    print("Cross-validation scores:", scores)
    print("Mean cross-validation score:", scores.mean())
    
    return rf_model, std_scaler
    
#Prediction of Weather based on user input
def predict_weather(rf_model, std_scaler, temp, dew_point, humidity, wind_speed, visibility, pressure):
    new_data = pd.DataFrame([[temp, dew_point, humidity, wind_speed, visibility, pressure]], 
                            columns=['Temp_C', 'Dew Point Temp_C', 'Rel Hum_%', 'Wind Speed_km/h', 'Visibility_km', 'Press_kPa'])
    new_data_std = std_scaler.transform(new_data)
    prediction = rf_model.predict(new_data_std)
    return prediction[0]
     
#Main Function
def main():
    data = pd.read_csv('C:/Users/LENOVO/Downloads/Weather_Data.csv')
    weather_df = preprocess_data(data)
    rf_model, std_scaler = train_model(weather_df)
    
#User input for weather prediction
    temp = float(input("Enter temperature (in Celsius): "))
    dew_point = float(input("Enter dew point temperature (in Celsius): "))
    humidity = float(input("Enter relative humidity (in %): "))
    wind_speed = float(input("Enter wind speed (in km/h): "))
    visibility = float(input("Enter visibility (in km): "))
    pressure = float(input("Enter pressure (in kPa): "))
    
#Weather Prediction based on user input
    user_prediction = predict_weather(rf_model, std_scaler, temp, dew_point, humidity, wind_speed, visibility, pressure)
    print("Predicted Weather Condition:", user_prediction)

if __name__ == "__main__":
    main()

Random Forest Accuracy: 0.663135593220339
              precision    recall  f1-score   support

       Clear       0.60      0.69      0.64       118
      Cloudy       0.53      0.43      0.47       128
        RAIN       0.74      0.77      0.75       126
        SNOW       0.78      0.80      0.79       100

    accuracy                           0.66       472
   macro avg       0.66      0.67      0.67       472
weighted avg       0.66      0.66      0.66       472

Cross-validation scores: [0.58050847 0.62288136 0.64194915 0.59957627 0.65605096]
Mean cross-validation score: 0.6201932419302602


Enter temperature (in Celsius):  34
Enter dew point temperature (in Celsius):  12
Enter relative humidity (in %):  57
Enter wind speed (in km/h):  35
Enter visibility (in km):  36
Enter pressure (in kPa):  345


Predicted Weather Condition: Cloudy
