<a href="https://colab.research.google.com/github/Vreddy0305/Machine-Learning-ML-/blob/lab4/LAB4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns

# 2. Data Loader for USGS-style files
def load_usgs(file, colname):
    df = pd.read_csv(file, sep='\t', header=None, names=['agency', 'site', 'datetime', 'tz', colname, 'qual'])
    df = df[['datetime', colname]].copy()
    df['datetime'] = pd.to_datetime(df['datetime'])
    df[colname] = pd.to_numeric(df[colname], errors='coerce')
    return df

# 3. Load and merge all 4 files (update path if files aren't in /content/)
stream = load_usgs('/content/stream_flow.csv', 'stream_flow')
precip = load_usgs('/content/precipitation.csv', 'precipitation')
gage = load_usgs('/content/gage_height.csv', 'gage_height')
storage = load_usgs('/content/reservoir_storage.csv', 'reservoir_storage')
df = stream.merge(precip, on='datetime').merge(gage, on='datetime').merge(storage, on='datetime')
df = df.dropna().reset_index(drop=True)

# 4. Add class label (e.g., high_flow_class = 1 if above median, 0 otherwise)
df['high_flow_class'] = (df['stream_flow'] > df['stream_flow'].median()).astype(int)

# 5. Prepare features and labels
features = ['precipitation', 'gage_height', 'reservoir_storage']
target = 'high_flow_class'
X = df[features]
y = df[target]

# 6. Train-test split and scaling
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 7. kNN Classifier (k=3) and metrics
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
y_pred_train = knn.predict(X_train_scaled)
print("\nConfusion Matrix (Test set):\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report (Test set):\n", classification_report(y_test, y_pred))
print(f"Accuracy (Test): {accuracy_score(y_test, y_pred):.3f}")
print(f"Accuracy (Train): {accuracy_score(y_train, y_pred_train):.3f}")

# 8. Vary k and plot train/test accuracy
k_list = list(range(1, 21))
train_acc = []
test_acc = []
for k in k_list:
    knn_k = KNeighborsClassifier(n_neighbors=k)
    knn_k.fit(X_train_scaled, y_train)
    train_acc.append(knn_k.score(X_train_scaled, y_train))
    test_acc.append(knn_k.score(X_test_scaled, y_test))
plt.figure(figsize=(8,4))
plt.plot(k_list, train_acc, 'o-', label='Train Accuracy')
plt.plot(k_list, test_acc, 's-', label='Test Accuracy')
plt.xlabel('k (Number of Neighbors)')
plt.ylabel('Accuracy')
plt.title('Accuracy vs k for kNN')
plt.legend()
plt.show()

# 9. GridSearchCV and RandomizedSearchCV for optimal k
param_grid = {'n_neighbors': list(range(1, 21))}
print("\nRunning GridSearchCV...")
grid = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5)
grid.fit(X_train_scaled, y_train)
best_k = grid.best_params_['n_neighbors']
print("Best k by GridSearchCV:", best_k)
print("Best CV score:", grid.best_score_)

print("\nRunning RandomizedSearchCV...")
rand = RandomizedSearchCV(
    KNeighborsClassifier(), param_grid, n_iter=10, cv=5, random_state=42)
rand.fit(X_train_scaled, y_train)
print("Best k by RandomizedSearchCV:", rand.best_params_['n_neighbors'])
print("Best CV score:", rand.best_score_)

# 10. Visualize classification for two features (change if you want other pairs)
features_plot = ['gage_height', 'reservoir_storage']
plt.figure(figsize=(6,5))
sns.scatterplot(data=df, x=features_plot[0], y=features_plot[1],
                hue='high_flow_class', palette='coolwarm', alpha=0.6)
plt.title('Class Separation (Train+Test)')
plt.show()
