In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.impute import KNNImputer

In [None]:
train = pd.read_csv("/kaggle/input/summer-analytics-mid-hackathon/hacktrain.csv")
test = pd.read_csv("/kaggle/input/summer-analytics-mid-hackathon/hacktest.csv")

train.drop(columns=["Unnamed: 0"], inplace=True, errors='ignore')
test.drop(columns=["Unnamed: 0"], inplace=True, errors='ignore')

ndvi_cols = [col for col in train.columns if "_N" in col]



In [None]:
train[ndvi_cols] = train[ndvi_cols].clip(lower=-1000, upper=1000)

knn_imputer = KNNImputer(n_neighbors=3)
train[ndvi_cols] = knn_imputer.fit_transform(train[ndvi_cols])

sns.heatmap(train.isnull(), cmap='Blues', cbar=False, yticklabels=False, xticklabels=train.columns);

In [None]:
train['class'].value_counts(normalize=True).plot(kind='bar', title="Class Distribution")


In [None]:
def add_features(df):
    df['ndvi_mean'] = df[ndvi_cols].mean(axis=1)
    df['ndvi_std'] = df[ndvi_cols].std(axis=1)
    df['ndvi_min'] = df[ndvi_cols].min(axis=1)
    df['ndvi_max'] = df[ndvi_cols].max(axis=1)
    df['ndvi_range'] = df['ndvi_max'] - df['ndvi_min']
    df['ndvi_median'] = df[ndvi_cols].median(axis=1)
    return df

train = add_features(train)
test = add_features(test)

In [None]:
# Update feature columns
feature_cols = ndvi_cols + ['ndvi_mean', 'ndvi_std', 'ndvi_min', 'ndvi_max', 'ndvi_range',
                            'ndvi_median']

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X_scaled, train['class'], test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Validation
val_preds = model.predict(X_val)
acc = accuracy_score(y_val, val_preds)
print("Validation Accuracy:", round(acc * 100, 2), "%")

# Predict on test set
test_preds = model.predict(X_test_scaled)

# Submission
submission = pd.DataFrame({'ID': test['ID'], 'class': test_preds})
submission.to_csv("submission.csv", index=False)