In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib
import sqlite3

In [29]:
# Load the dataset
data = pd.read_csv('/Users/anujjainbatu/Desktop/flood-prediction/data/FloodPrediction.csv')

# Handle missing values
data = data.dropna()

# Encode categorical variables
label_encoder = LabelEncoder()
data['Station_Names'] = label_encoder.fit_transform(data['Station_Names'])

# Normalize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data.drop('Flood?', axis=1))
data_scaled = pd.DataFrame(data_scaled, columns=data.columns[:-1])
data_scaled['Flood?'] = data['Flood?'].values

# Define features and target variable
X = data_scaled.drop('Flood?', axis=1)
y = data_scaled['Flood?']

In [30]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = LogisticRegression()

# Train the model
model.fit(X_train, y_train)

# Save the model
joblib.dump(model, '/Users/anujjainbatu/Desktop/flood-prediction/models/flood_prediction_model.pkl')

['/Users/anujjainbatu/Desktop/flood-prediction/models/flood_prediction_model.pkl']

In [31]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)
f1 = f1_score(y_test, y_pred, zero_division=1)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 0.9922135706340378
Precision: 0.9939172749391727
Recall: 0.9975579975579976
F1 Score: 0.9957343083485679


In [51]:
from sklearn.preprocessing import LabelEncoder
import numpy as np

class CustomLabelEncoder(LabelEncoder):
    def fit(self, y):
        super().fit(y)
        self.classes_ = np.append(self.classes_, 'unknown')
        return self

    def transform(self, y):
        unknown_label = len(self.classes_) - 1
        y = np.array([x if x in self.classes_ else 'unknown' for x in y])
        return super().transform(y)

    def fit_transform(self, y):
        return self.fit(y).transform(y)

# Connect to SQLite database
conn = sqlite3.connect('/Users/anujjainbatu/Desktop/flood-prediction/data/flood_data.db')
cursor = conn.cursor()

# Fetch data from the database
cursor.execute('SELECT Sl, Station_Names, Year, Month, Max_Temp, Min_Temp, Rainfall, Relative_Humidity, Wind_Speed, Cloud_Coverage, Bright_Sunshine, Station_Number, X_COR, Y_COR, LATITUDE, LONGITUDE, ALT, Period FROM flood_data WHERE Flood IS NULL')
rows = cursor.fetchall()

# Convert to DataFrame
columns = ['Sl', 'Station_Names', 'Year', 'Month', 'Max_Temp', 'Min_Temp', 'Rainfall', 'Relative_Humidity', 'Wind_Speed', 'Cloud_Coverage', 'Bright_Sunshine', 'Station_Number', 'X_COR', 'Y_COR', 'LATITUDE', 'LONGITUDE', 'ALT', 'Period']
operational_data = pd.DataFrame(rows, columns=columns)

# Handle missing values
operational_data = operational_data.dropna()

# Initialize the custom label encoder
label_encoder = CustomLabelEncoder()
operational_data['Station_Names'] = label_encoder.fit_transform(operational_data['Station_Names'])

# Encode categorical variables
operational_data['Station_Names'] = label_encoder.transform(operational_data['Station_Names'])


In [45]:
# Load the model
model = joblib.load('/Users/anujjainbatu/Desktop/flood-prediction/models/flood_prediction_model.pkl')

# Make predictions
predictions = model.predict(operational_data)

# Print predictions
print("Predictions:", predictions)

Predictions: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [46]:
# List of required features
required_features = ['Station_Names', 'Year', 'Month', 'Max_Temp', 'Min_Temp', 'Rainfall', 'Relative_Humidity', 'Wind_Speed', 'Cloud_Coverage', 'Bright_Sunshine', 'Station_Number', 'X_COR', 'Y_COR', 'LATITUDE', 'LONGITUDE', 'ALT']

# List current features
current_features = operational_data.columns.tolist()
current_features.remove('Sl')
current_features.remove('Period')

# Compare features
missing_features = set(required_features) - set(current_features)

In [47]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred, zero_division=1)
f1 = f1_score(y_test, y_pred, zero_division=1)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

Accuracy: 0.9922135706340378
Precision: 0.9939172749391727
Recall: 0.9975579975579976
F1 Score: 0.9957343083485679


In [52]:
# Load the model
model = joblib.load('/Users/anujjainbatu/Desktop/flood-prediction/models/flood_prediction_model.pkl')

# Make predictions
predictions = model.predict(operational_data)

# Add predictions to the operational data
operational_data['Flood'] = predictions

# Save predictions back to the database
for index, row in operational_data.iterrows():
    cursor.execute('''
        UPDATE flood_data
        SET Flood = ?
        WHERE Sl = ?
    ''', (row['Flood'], row['Sl']))

# Commit the changes and close the connection
conn.commit()
conn.close()

print("Predictions saved to the database.")

Predictions saved to the database.
