In [None]:
import pandas as pd
import numpy as np


df = pd.read_csv('weather.csv')

#rain
non_missing_rain = df['rain'].dropna()

rain_mean = non_missing_rain.mean()
rain_std = non_missing_rain.std()


np.random.seed(42)
for idx, row in df.iterrows():
    if pd.isna(row['rain']):
        imputed_rain = np.random.normal(loc=rain_mean, scale=rain_std)
        imputed_rain = np.clip(imputed_rain, 0, 1)
        imputed_rain = round(imputed_rain, 4)
        df.at[idx, 'rain'] = imputed_rain


def impute_column_with_random_values(column_name):
    col_min = df[column_name].min()
    col_max = df[column_name].max()

    non_missing_values = df[column_name].dropna()

    np.random.seed(42)
    for idx, row in df.iterrows():
        if pd.isna(row[column_name]):
            imputed_value = np.random.uniform(low=col_min, high=col_max)
            imputed_value = round(imputed_value, 2)
            df.at[idx, column_name] = imputed_value

columns_to_impute = ['temp','clouds', 'pressure', 'humidity', 'wind']


for column in columns_to_impute:
    impute_column_with_random_values(column)


df.to_csv('updated_dataset.csv', index=False)


In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler


df = pd.read_csv('updated_dataset.csv')


columns_to_normalize = ['temp', 'pressure', 'rain', 'humidity', 'wind']


scaler = MinMaxScaler(feature_range=(0, 1))


df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])


df[columns_to_normalize] = df[columns_to_normalize].round(4)


df.to_csv('normalized_dataset.csv', index=False)


In [None]:
import pandas as pd

# Load your normalized dataset into a pandas DataFrame
df = pd.read_csv('normalized_dataset.csv')

# Define weighted coefficients for each weather feature
w_temp = 0.3
w_pressure = 0.2
w_rain = 0.4
w_humidity = 0.1
w_clouds = 0.3
w_wind = 0.2

# Calculate surge factor (SF) using the adjusted formula
df['surge_factor'] = (1 + (
    w_temp * df['temp'] +
    w_clouds * df['clouds'] +
    w_pressure * df['pressure'] +
    w_rain * df['rain'] +
    w_humidity * df['humidity'] +
    w_wind * df['wind']
) / 2).round(4)

# Clip surge factor to ensure it falls within the range [1, 2]
df['surge_factor'] = df['surge_factor'].clip(lower=1, upper=2)

# Save the DataFrame with surge factor as an extra column to a new CSV file
df.to_csv('finalweather.csv', index=False)

# Print message indicating successful save
print("DataFrame with surge factor saved to FINALWEATHER.csv")


DataFrame with surge factor saved to FINALWEATHER.csv


In [None]:
import pandas as pd

csv1_path = 'dynamic.csv'
csv2_path = 'finalweather.csv'

df1 = pd.read_csv(csv1_path)
df2 = pd.read_csv(csv2_path)

# Determine the number of times df1 rows need to be repeated
repeat_factor = (len(df2) // len(df1)) + 1  # Ensure enough repeats to cover df2

# Repeat rows of df1 to match the length of df2
df1_repeated = pd.concat([df1] * repeat_factor, ignore_index=True).iloc[:len(df2)]

# Concatenate df1_repeated with df2
combined_df = pd.concat([df1_repeated, df2], axis=1)

# Save the combined DataFrame to a new CSV file
output_file_path = 'combineddf1df2.csv'
combined_df.to_csv(output_file_path, index=False)

print(f'Saved combined data to: {output_file_path}')


Saved combined data to: combineddf1df2.csv
