In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns

df = pd.read_csv("hacktrain.csv")

df.isnull().sum()

df.fillna(df.mean(numeric_only=True), inplace=True)

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report


# Encode class column (if it's categorical)
label_encoder = LabelEncoder()
df['class'] = label_encoder.fit_transform(df['class'])

# Split into features and target
X = df.drop(columns=['class'])
X = X.drop(columns=['Unnamed: 0', 'ID'])
y = df['class']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.1, random_state=26, stratify=y)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fit multinomial logistic regression
model = LogisticRegression(
    solver='lbfgs',
    max_iter=1000
)
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Classification report with all original class labels
print(classification_report(
    y_test,
    y_pred,
    labels=list(range(len(label_encoder.classes_))),
    target_names=label_encoder.classes_
))



              precision    recall  f1-score   support

        farm       0.79      0.58      0.67        84
      forest       0.91      0.98      0.94       616
       grass       0.50      0.20      0.29        20
  impervious       0.89      0.82      0.85        67
     orchard       1.00      0.33      0.50         3
       water       0.83      0.50      0.62        10

    accuracy                           0.90       800
   macro avg       0.82      0.57      0.65       800
weighted avg       0.89      0.90      0.88       800



In [3]:
# Test the model
test_data = pd.read_csv("hacktest.csv")
ndvi_data = test_data.drop(columns=['ID', 'Unnamed: 0'])
# scaler = StandardScaler()
ndvi_data = scaler.fit_transform(ndvi_data)


y_test = model.predict(ndvi_data)
y_decoded = label_encoder.inverse_transform(y_test)
result = pd.DataFrame({
    'ID': test_data['ID'],
    'class': y_decoded
})
result

Unnamed: 0,ID,class
0,1,orchard
1,2,forest
2,3,orchard
3,4,forest
4,5,forest
...,...,...
2840,2841,water
2841,2842,impervious
2842,2843,water
2843,2844,impervious


In [4]:
result.to_csv("Hackathon1 result.csv", index=False)