In [1]:
import pandas as pd
import numpy as np

# Define the sample size
sample_size = 100

# Generate random data for the features
data = {
    'Student_ID': np.arange(1, sample_size + 1),
    'Age': np.random.randint(18, 25, size=sample_size),
    'Gender': np.random.choice(['Male', 'Female'], size=sample_size),
    'Study_Hours_per_Week': np.random.randint(5, 40, size=sample_size),
    'Previous_Scores': np.random.randint(50, 100, size=sample_size),
    'Class_Participation': np.random.randint(50, 100, size=sample_size),
    'Attendance_Rate': np.random.randint(50, 100, size=sample_size),
    'Parental_Education_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate', 'Postgraduate'], size=sample_size),
    'Extra_Curricular_Activities': np.random.choice(['Yes', 'No'], size=sample_size),
    'Internet_Access_at_Home': np.random.choice(['Yes', 'No'], size=sample_size),
    'Socioeconomic_Status': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Health_Issues': np.random.choice(['Yes', 'No'], size=sample_size),
    'Motivation_Level': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Peer_Influence': np.random.choice(['Positive', 'Negative', 'Neutral'], size=sample_size),
    'Time_Management_Skills': np.random.choice(['Poor', 'Average', 'Good'], size=sample_size),
    'Family_Support': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Stress_Level': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Learning_Style': np.random.choice(['Visual', 'Auditory', 'Kinesthetic'], size=sample_size),
    'Previous_GPA': np.random.choice(['1.0 - 2.0', '2.1 - 2.5', '2.6 - 3.0', '3.0 - 3.5', '3.5 - 4.0', '4.1 - 4.5', '4.6 - 5.0'], size=sample_size)
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to CSV
df.to_csv('student_performance_dataset.csv', index=False)

print("CSV file 'student_performance_dataset.csv' has been created.")


CSV file 'student_performance_dataset.csv' has been created.


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Generate sample dataset
sample_size = 100

# Generate random data for the features
data = {
    'Student_ID': np.arange(1, sample_size + 1),
    'Age': np.random.randint(18, 25, size=sample_size),
    'Gender': np.random.choice(['Male', 'Female'], size=sample_size),
    'Study_Hours_per_Week': np.random.randint(5, 40, size=sample_size),
    'Previous_Scores': np.random.randint(50, 100, size=sample_size),
    'Class_Participation': np.random.randint(50, 100, size=sample_size),
    'Attendance_Rate': np.random.randint(50, 100, size=sample_size),
    'Parental_Education_Level': np.random.choice(['High School', 'Undergraduate', 'Graduate', 'Postgraduate'], size=sample_size),
    'Extra_Curricular_Activities': np.random.choice(['Yes', 'No'], size=sample_size),
    'Internet_Access_at_Home': np.random.choice(['Yes', 'No'], size=sample_size),
    'Socioeconomic_Status': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Health_Issues': np.random.choice(['Yes', 'No'], size=sample_size),
    'Motivation_Level': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Peer_Influence': np.random.choice(['Positive', 'Negative', 'Neutral'], size=sample_size),
    'Time_Management_Skills': np.random.choice(['Poor', 'Average', 'Good'], size=sample_size),
    'Family_Support': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Stress_Level': np.random.choice(['Low', 'Medium', 'High'], size=sample_size),
    'Learning_Style': np.random.choice(['Visual', 'Auditory', 'Kinesthetic'], size=sample_size),
    'Previous_GPA': np.random.choice(['1.0 - 2.0', '2.1 - 2.5', '2.6 - 3.0', '3.0 - 3.5', '3.5 - 4.0', '4.1 - 4.5', '4.6 - 5.0'], size=sample_size)
}

# Create DataFrame
df = pd.read_csv('student_performance_dataset.csv')

# Step 2: Encode categorical features
label_encoders = {}
categorical_columns = df.select_dtypes(include=['object']).columns.tolist()
categorical_columns.remove('Student_ID')  # Drop Student_ID from categorical columns

# Initialize LabelEncoder
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Step 3: Split the dataset into features and target
# Create a target variable for demonstration (for example, predicting performance based on GPA)
df['Final_Performance'] = pd.cut(df['Previous_Scores'], bins=[0, 60, 70, 80, 90, 100], labels=['Poor', 'Average', 'Good', 'Very Good', 'Excellent'])

# Encode target variable
le_target = LabelEncoder()
df['Final_Performance'] = le_target.fit_transform(df['Final_Performance'])

X = df.drop(columns=["Final_Performance", "Student_ID"])  # Features
y = df["Final_Performance"]  # Target variable

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Standardize numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Train Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Step 8: Example Prediction
example_input = {
    "Age": 22,
    "Gender": "Male",
    "Study_Hours_per_Week": 30,
    "Previous_Scores": 85,
    "Class_Participation": 80,
    "Attendance_Rate": 90,
    "Parental_Education_Level": "Undergraduate",
    "Extra_Curricular_Activities": "Yes",
    "Internet_Access_at_Home": "Yes",
    "Socioeconomic_Status": "Medium",
    "Health_Issues": "No",
    "Motivation_Level": "High",
    "Peer_Influence": "Positive",
    "Time_Management_Skills": "Good",
    "Family_Support": "Strong",
    "Stress_Level": "Low",
    "Learning_Style": "Visual",
    "Previous_GPA": "3.5 - 4.0"
}

# Transform the input data
example_df = pd.DataFrame([example_input])
for col in categorical_columns:
    le = label_encoders[col]
    example_df[col] = le.transform(example_df[col])

example_df_scaled = scaler.transform(example_df)

# Predict performance
predicted_performance = model.predict(example_df_scaled)
predicted_performance_label = le_target.inverse_transform(predicted_performance)

print(f"Predicted Performance: {predicted_performance_label[0]}")


ValueError: list.remove(x): x not in list

In [1]:
food = "Yam"
fruit = "Orange"
if fruit == food:
    print("Yes")
else:
    print("No")

No


In [2]:
age = 18
if age >=18:
    print("You are Eligible to vote")

You are Eligible to vote


In [4]:
age = 16

if age >= 18:
    print("You are eligible to vote.")
elif age >= 16:
    print("You are almost eligible to vote.")


You are almost eligible to vote.


In [5]:
age = 12

if age >= 18:
    print("You are eligible to vote.")
elif age >= 16:
    print("You are almost eligible to vote.")
else:
    print("You are too young to vote.")


You are too young to vote.


In [6]:
score = 85

if score >= 90:
    print("Grade: A")
elif score >= 80:
    print("Grade: B")
elif score >= 70:
    print("Grade: C")
else:
    print("Grade: F")


Grade: B
