In [None]:
import pandas as pd

# Load the training and test datasets
train_path = 'hacktrain.csv'
test_path = 'hacktest.csv'

train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

# Show basic info and first few rows of each dataset
train_info = train_df.info()
test_info = test_df.info()

train_head = train_df.head()
test_head = test_df.head()

train_df['class'].value_counts(), train_df.shape, test_df.shape, train_head, test_head

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Drop 'Unnamed: 0' and separate features/labels
train_df.drop(columns=['Unnamed: 0'], inplace=True)
test_df.drop(columns=['Unnamed: 0'], inplace=True)

# Identify feature columns
ndvi_columns = [col for col in train_df.columns if '_N' in col]

# Impute missing NDVI values in training data
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(train_df[ndvi_columns])

# Add statistical features
def add_statistical_features(X, feature_names):
    X = pd.DataFrame(X, columns=feature_names)
    X['mean_ndvi'] = X.mean(axis=1)
    X['std_ndvi'] = X.std(axis=1)
    X['max_ndvi'] = X.max(axis=1)
    X['min_ndvi'] = X.min(axis=1)
    X['range_ndvi'] = X['max_ndvi'] - X['min_ndvi']
    return X

X_train_features = add_statistical_features(X_train_imputed, ndvi_columns)

# Encode target labels
le = LabelEncoder()
y_train = le.fit_transform(train_df['class'])

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_features)

# Train-test split for internal evaluation
X_tr, X_val, y_tr, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression(max_iter=1000, multi_class='multinomial', solver='saga')
model.fit(X_tr, y_tr)

# Evaluate model
y_pred_val = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_pred_val)
val_accuracy


In [None]:
# Impute missing NDVI values in the test set
X_test_imputed = imputer.transform(test_df[ndvi_columns])

# Add statistical features
X_test_features = add_statistical_features(X_test_imputed, ndvi_columns)

# Scale the test features
X_test_scaled = scaler.transform(X_test_features)

# Predict using the trained model
y_test_pred_encoded = model.predict(X_test_scaled)
y_test_pred_labels = le.inverse_transform(y_test_pred_encoded)

# Prepare the submission DataFrame
submission_df = pd.DataFrame({
    'ID': test_df['ID'],
    'class': y_test_pred_labels
})

# Save to CSV
submission_path = 'data.csv'
submission_df.to_csv(submission_path, index=False)

submission_df.head()
