Notes:

01. The training dataset is for making the submission models.
02. The limit of total possible submissions in the competitions is ten.
03. We can make practice submissions for learning the kaggle setup.
04. Kaggle evaluates only the best submission for each team.
05. The main goal is to participate, to practice & to perform as well as possible.
06. The best participants in the kaggle competition get bonus grades.
07. We can code anywhere anyhow. 
08. The final code should be in the shared kaggle notebook.
09. The final submission should be through the team section in the competition (don't make the mistake of submitting individually).

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import numpy as np # linear algebra

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import warnings

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# loading + preprocessing

df = pd.read_csv('/kaggle/input/mental-health-prediction-hackathon/train.csv')
df.head

In [None]:
df.info

In [None]:
warnings.simplefilter(action='ignore', category=UserWarning)  # Suppress warnings
df_depression_1 = df[df['Depression'] == 1]  # Filter data for Depression == 1
columns_to_plot = [col for col in df.columns if col != 'Depression']  # Get all column names except 'Depression'
num_columns = len(columns_to_plot)  # Get the plot grid (1 row, len(columns_to_plot) columns)
plt.figure(figsize=(15, num_columns * 5))

for i, column in enumerate(columns_to_plot, 1):  # Loop through each column to create subplots
    plt.subplot(num_columns, 1, i)  # Create a subplot for each column
    # For 'CGPA', we round to integers for better visualization
    if df_depression_1[column].dtype == 'float64':  # For numeric columns
        df_depression_1[column] = pd.to_numeric(df_depression_1[column], errors='coerce')
        df_depression_1[column] = df_depression_1[column].dropna().round().astype(int)
    # Plot the count of each unique value in the column (categorical or numeric)
    sns.countplot(x=column, data=df_depression_1)
    plt.title(f'{column} vs Depression (Depression == 1)')
    plt.xticks(rotation=90)

# Adjust layout to avoid overlap
plt.tight_layout()
plt.show()

In [None]:
# training preparation

for feature in df.columns:  # Loop through all columns in the dataframe
   if df[feature].dtype == 'object':  # Only apply for columns with categorical strings
       df[feature] = pd.Categorical(df[feature]).codes  # Replace strings with an integer

df.info()

In [None]:
df.head()

In [None]:
df.fillna(0,inplace=True)

In [None]:
# Preparing features (X) and target (y)
X = df.drop(columns=['id', 'Name', 'Academic Pressure', 'CGPA', 'Study Satisfaction', 'Depression'])
Y = df['Depression']

# Not splitting the data into training and test sets


In [None]:
# Model Training and evaluation
log_reg = LogisticRegression()
log_reg.fit(X, Y)

In [None]:
y_pred = log_reg.predict(X)
accuracy = accuracy_score(y_pred, Y)
print("Accuracy: ", accuracy)

In [None]:
# Load the test data
test_df = pd.read_csv('/kaggle/input/mental-health-prediction-hackathon/test.csv')
test_df.head()

In [None]:
test_df.shape


In [None]:
# Apply the same preprocessing steps as the training data
for feature in test_df.columns:
   if test_df[feature].dtype == 'object':
       test_df[feature] = pd.Categorical(test_df[feature]).codes

test_df.fillna(0, inplace=True)


In [None]:

# Make predictions on the test data
X_submission = test_df[X.columns.intersection(test_df.columns)].copy()
test_predictions = log_reg.predict(X_submission)

In [None]:
# Create the submission DataFrame
submission = pd.DataFrame({
   'id': test_df['id'],
   'Depression': test_predictions
})

# Save the submission file
submission.to_csv('team_technoids-logistic_regression_test_predictions.csv', index=False)
print("Result output submission file created successfully!")
