# Machine Learning Practice Tasks (Regression)

This notebook contains 6 regression tasks to practice machine learning concepts.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import seaborn as sns

# Set style for better plots
plt.style.use('default')
sns.set_palette("husl")

## Task 1: Experience vs Salary Prediction
**Objective**: Predict salary based on years of experience.

**Steps**:
1. Create a dataset manually with columns: Experience and Salary (at least 10 rows)
2. Train a Linear Regression model
3. Predict salary for a new experience value (e.g., 5 years)
4. Print evaluation metrics: Mean Squared Error (MSE), R² Score
5. Plot a graph with Experience vs Salary (scatter + regression line)

In [None]:
# Task 1: Experience vs Salary Prediction

# 1. Create dataset manually
experience_data = {
    'Experience': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'Salary': [30000, 35000, 42000, 48000, 55000, 62000, 68000, 75000, 82000, 90000, 95000, 105000]
}

df_salary = pd.DataFrame(experience_data)
print("Dataset:")
print(df_salary)
print(f"\nDataset shape: {df_salary.shape}")

# 2. Prepare data and train Linear Regression model
X = df_salary[['Experience']]
y = df_salary['Salary']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
lr_salary = LinearRegression()
lr_salary.fit(X_train, y_train)

# Make predictions
y_pred = lr_salary.predict(X_test)

print(f"\nModel coefficients:")
print(f"Slope: {lr_salary.coef_[0]:.2f}")
print(f"Intercept: {lr_salary.intercept_:.2f}")

# 3. Predict salary for 5 years experience
new_experience = [[5]]
predicted_salary = lr_salary.predict(new_experience)
print(f"\nPredicted salary for 5 years experience: ${predicted_salary[0]:.2f}")

# 4. Print evaluation metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nEvaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.4f}")

# 5. Plot graph with scatter points and regression line
plt.figure(figsize=(10, 6))
plt.scatter(X_train, y_train, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test, y_test, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_range = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_range_pred = lr_salary.predict(X_range)
plt.plot(X_range, y_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(5, predicted_salary[0], color='orange', s=100, marker='*', label=f'Prediction (5 years: ${predicted_salary[0]:.0f})')

plt.xlabel('Experience (Years)')
plt.ylabel('Salary ($)')
plt.title('Task 1: Experience vs Salary Prediction')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Task 2: House Price Prediction
**Objective**: Predict house prices using area (in sq. feet).

**Steps**:
1. Create a dataset with columns: Area and Price
2. Train a Linear Regression model
3. Predict price for a new area value (e.g., 2500 sq.ft)
4. Print evaluation metrics: Mean Absolute Error (MAE), Mean Squared Error (MSE)
5. Plot a graph with Area vs Price (scatter + regression line)

In [None]:
# Task 2: House Price Prediction

# 1. Create dataset with Area and Price
house_data = {
    'Area': [1200, 1500, 1800, 2000, 2200, 2500, 2800, 3000, 3200, 3500, 3800, 4000],
    'Price': [180000, 220000, 260000, 290000, 320000, 360000, 400000, 430000, 460000, 500000, 540000, 580000]
}

df_house = pd.DataFrame(house_data)
print("House Dataset:")
print(df_house)
print(f"\nDataset shape: {df_house.shape}")

# 2. Prepare data and train Linear Regression model
X_house = df_house[['Area']]
y_house = df_house['Price']

# Split the data
X_train_house, X_test_house, y_train_house, y_test_house = train_test_split(X_house, y_house, test_size=0.25, random_state=42)

# Train the model
lr_house = LinearRegression()
lr_house.fit(X_train_house, y_train_house)

# Make predictions
y_pred_house = lr_house.predict(X_test_house)

print(f"\nModel coefficients:")
print(f"Slope: {lr_house.coef_[0]:.2f}")
print(f"Intercept: {lr_house.intercept_:.2f}")

# 3. Predict price for 2500 sq.ft area
new_area = [[2500]]
predicted_price = lr_house.predict(new_area)
print(f"\nPredicted price for 2500 sq.ft: ${predicted_price[0]:.2f}")

# 4. Print evaluation metrics
mae_house = mean_absolute_error(y_test_house, y_pred_house)
mse_house = mean_squared_error(y_test_house, y_pred_house)

print(f"\nEvaluation Metrics:")
print(f"Mean Absolute Error (MAE): ${mae_house:.2f}")
print(f"Mean Squared Error (MSE): ${mse_house:.2f}")

# 5. Plot graph with Area vs Price
plt.figure(figsize=(10, 6))
plt.scatter(X_train_house, y_train_house, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test_house, y_test_house, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_house_range = np.linspace(X_house.min(), X_house.max(), 100).reshape(-1, 1)
y_house_range_pred = lr_house.predict(X_house_range)
plt.plot(X_house_range, y_house_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(2500, predicted_price[0], color='orange', s=100, marker='*', label=f'Prediction (2500 sq.ft: ${predicted_price[0]:.0f})')

plt.xlabel('Area (sq.ft)')
plt.ylabel('Price ($)')
plt.title('Task 2: House Price Prediction')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Task 3: Study Hours vs Exam Score
**Objective**: Predict student exam scores based on study hours.

**Steps**:
1. Create a dataset with columns: Hours Studied and Exam Score
2. Train a Linear Regression model
3. Predict exam score for a new input (e.g., 8 hours)
4. Print evaluation metrics: Root Mean Squared Error (RMSE), R² Score
5. Plot a graph with Hours vs Score (scatter + regression line)

In [None]:
# Task 3: Study Hours vs Exam Score

# 1. Create dataset with Hours Studied and Exam Score
study_data = {
    'Hours_Studied': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
    'Exam_Score': [45, 52, 58, 65, 70, 75, 80, 85, 88, 90, 92, 94, 96, 98]
}

df_study = pd.DataFrame(study_data)
print("Study Dataset:")
print(df_study)
print(f"\nDataset shape: {df_study.shape}")

# 2. Prepare data and train Linear Regression model
X_study = df_study[['Hours_Studied']]
y_study = df_study['Exam_Score']

# Split the data
X_train_study, X_test_study, y_train_study, y_test_study = train_test_split(X_study, y_study, test_size=0.25, random_state=42)

# Train the model
lr_study = LinearRegression()
lr_study.fit(X_train_study, y_train_study)

# Make predictions
y_pred_study = lr_study.predict(X_test_study)

print(f"\nModel coefficients:")
print(f"Slope: {lr_study.coef_[0]:.2f}")
print(f"Intercept: {lr_study.intercept_:.2f}")

# 3. Predict exam score for 8 hours of study
new_hours = [[8]]
predicted_score = lr_study.predict(new_hours)
print(f"\nPredicted exam score for 8 hours of study: {predicted_score[0]:.2f}")

# 4. Print evaluation metrics
rmse_study = np.sqrt(mean_squared_error(y_test_study, y_pred_study))
r2_study = r2_score(y_test_study, y_pred_study)

print(f"\nEvaluation Metrics:")
print(f"Root Mean Squared Error (RMSE): {rmse_study:.2f}")
print(f"R² Score: {r2_study:.4f}")

# 5. Plot graph with Hours vs Score
plt.figure(figsize=(10, 6))
plt.scatter(X_train_study, y_train_study, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test_study, y_test_study, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_study_range = np.linspace(X_study.min(), X_study.max(), 100).reshape(-1, 1)
y_study_range_pred = lr_study.predict(X_study_range)
plt.plot(X_study_range, y_study_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(8, predicted_score[0], color='orange', s=100, marker='*', label=f'Prediction (8 hours: {predicted_score[0]:.1f})')

plt.xlabel('Hours Studied')
plt.ylabel('Exam Score')
plt.title('Task 3: Study Hours vs Exam Score')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Task 4: Temperature vs Ice Cream Sales
**Objective**: Predict ice cream sales based on temperature.

**Steps**:
1. Create a dataset with columns: Temperature (°C) and Sales (in Rs)
2. Train a Linear Regression model
3. Predict sales for a new temperature (e.g., 35°C)
4. Print evaluation metrics: Mean Squared Error (MSE), R² Score
5. Plot a graph with Temperature vs Sales (scatter + regression line)

In [None]:
# Task 4: Temperature vs Ice Cream Sales

# 1. Create dataset with Temperature and Sales
temp_data = {
    'Temperature': [15, 18, 20, 22, 25, 28, 30, 32, 35, 38, 40, 42],
    'Sales': [1200, 1400, 1600, 1800, 2200, 2600, 3000, 3400, 3800, 4200, 4600, 5000]
}

df_temp = pd.DataFrame(temp_data)
print("Temperature vs Sales Dataset:")
print(df_temp)
print(f"\nDataset shape: {df_temp.shape}")

# 2. Prepare data and train Linear Regression model
X_temp = df_temp[['Temperature']]
y_temp = df_temp['Sales']

# Split the data
X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

# Train the model
lr_temp = LinearRegression()
lr_temp.fit(X_train_temp, y_train_temp)

# Make predictions
y_pred_temp = lr_temp.predict(X_test_temp)

print(f"\nModel coefficients:")
print(f"Slope: {lr_temp.coef_[0]:.2f}")
print(f"Intercept: {lr_temp.intercept_:.2f}")

# 3. Predict sales for 35°C temperature
new_temp = [[35]]
predicted_sales = lr_temp.predict(new_temp)
print(f"\nPredicted sales for 35°C: Rs. {predicted_sales[0]:.2f}")

# 4. Print evaluation metrics
mse_temp = mean_squared_error(y_test_temp, y_pred_temp)
r2_temp = r2_score(y_test_temp, y_pred_temp)

print(f"\nEvaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse_temp:.2f}")
print(f"R² Score: {r2_temp:.4f}")

# 5. Plot graph with Temperature vs Sales
plt.figure(figsize=(10, 6))
plt.scatter(X_train_temp, y_train_temp, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test_temp, y_test_temp, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_temp_range = np.linspace(X_temp.min(), X_temp.max(), 100).reshape(-1, 1)
y_temp_range_pred = lr_temp.predict(X_temp_range)
plt.plot(X_temp_range, y_temp_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(35, predicted_sales[0], color='orange', s=100, marker='*', label=f'Prediction (35°C: Rs. {predicted_sales[0]:.0f})')

plt.xlabel('Temperature (°C)')
plt.ylabel('Sales (Rs.)')
plt.title('Task 4: Temperature vs Ice Cream Sales')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Task 5: Car Age vs Resale Value
**Objective**: Predict car resale value based on its age.

**Steps**:
1. Create a dataset with columns: Car Age (years) and Resale Value (in Rs)
2. Train a Linear Regression model
3. Predict resale value for a new car age (e.g., 6 years)
4. Print evaluation metrics: Mean Absolute Error (MAE), R² Score
5. Plot a graph with Car Age vs Resale Value (scatter + regression line)

In [None]:
# Task 5: Car Age vs Resale Value

# 1. Create dataset with Car Age and Resale Value
car_data = {
    'Car_Age': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'Resale_Value': [950000, 850000, 750000, 680000, 620000, 560000, 500000, 450000, 400000, 350000, 300000, 250000]
}

df_car = pd.DataFrame(car_data)
print("Car Age vs Resale Value Dataset:")
print(df_car)
print(f"\nDataset shape: {df_car.shape}")

# 2. Prepare data and train Linear Regression model
X_car = df_car[['Car_Age']]
y_car = df_car['Resale_Value']

# Split the data
X_train_car, X_test_car, y_train_car, y_test_car = train_test_split(X_car, y_car, test_size=0.25, random_state=42)

# Train the model
lr_car = LinearRegression()
lr_car.fit(X_train_car, y_train_car)

# Make predictions
y_pred_car = lr_car.predict(X_test_car)

print(f"\nModel coefficients:")
print(f"Slope: {lr_car.coef_[0]:.2f}")
print(f"Intercept: {lr_car.intercept_:.2f}")

# 3. Predict resale value for 6 years old car
new_car_age = [[6]]
predicted_resale = lr_car.predict(new_car_age)
print(f"\nPredicted resale value for 6 years old car: Rs. {predicted_resale[0]:.2f}")

# 4. Print evaluation metrics
mae_car = mean_absolute_error(y_test_car, y_pred_car)
r2_car = r2_score(y_test_car, y_pred_car)

print(f"\nEvaluation Metrics:")
print(f"Mean Absolute Error (MAE): Rs. {mae_car:.2f}")
print(f"R² Score: {r2_car:.4f}")

# 5. Plot graph with Car Age vs Resale Value
plt.figure(figsize=(10, 6))
plt.scatter(X_train_car, y_train_car, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test_car, y_test_car, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_car_range = np.linspace(X_car.min(), X_car.max(), 100).reshape(-1, 1)
y_car_range_pred = lr_car.predict(X_car_range)
plt.plot(X_car_range, y_car_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(6, predicted_resale[0], color='orange', s=100, marker='*', label=f'Prediction (6 years: Rs. {predicted_resale[0]:.0f})')

plt.xlabel('Car Age (Years)')
plt.ylabel('Resale Value (Rs.)')
plt.title('Task 5: Car Age vs Resale Value')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Task 6: Exercise Duration vs Calories Burned
**Objective**: Predict calories burned based on minutes of exercise.

**Steps**:
1. Create a dataset with columns: Exercise Duration (minutes) and Calories Burned
2. Train a Linear Regression model
3. Predict calories for a new duration (e.g., 60 minutes)
4. Print evaluation metrics: Mean Squared Error (MSE), Root Mean Squared Error (RMSE)
5. Plot a graph with Duration vs Calories (scatter + regression line)

In [None]:
# Task 6: Exercise Duration vs Calories Burned

# 1. Create dataset with Exercise Duration and Calories Burned
exercise_data = {
    'Exercise_Duration': [10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90],
    'Calories_Burned': [50, 75, 100, 125, 150, 175, 200, 225, 250, 300, 350, 400, 450]
}

df_exercise = pd.DataFrame(exercise_data)
print("Exercise Duration vs Calories Burned Dataset:")
print(df_exercise)
print(f"\nDataset shape: {df_exercise.shape}")

# 2. Prepare data and train Linear Regression model
X_exercise = df_exercise[['Exercise_Duration']]
y_exercise = df_exercise['Calories_Burned']

# Split the data
X_train_exercise, X_test_exercise, y_train_exercise, y_test_exercise = train_test_split(X_exercise, y_exercise, test_size=0.25, random_state=42)

# Train the model
lr_exercise = LinearRegression()
lr_exercise.fit(X_train_exercise, y_train_exercise)

# Make predictions
y_pred_exercise = lr_exercise.predict(X_test_exercise)

print(f"\nModel coefficients:")
print(f"Slope: {lr_exercise.coef_[0]:.2f}")
print(f"Intercept: {lr_exercise.intercept_:.2f}")

# 3. Predict calories burned for 60 minutes of exercise
new_duration = [[60]]
predicted_calories = lr_exercise.predict(new_duration)
print(f"\nPredicted calories burned for 60 minutes: {predicted_calories[0]:.2f}")

# 4. Print evaluation metrics
mse_exercise = mean_squared_error(y_test_exercise, y_pred_exercise)
rmse_exercise = np.sqrt(mse_exercise)

print(f"\nEvaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse_exercise:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse_exercise:.2f}")

# 5. Plot graph with Duration vs Calories
plt.figure(figsize=(10, 6))
plt.scatter(X_train_exercise, y_train_exercise, color='blue', alpha=0.6, label='Training Data')
plt.scatter(X_test_exercise, y_test_exercise, color='red', alpha=0.8, label='Test Data')

# Create regression line
X_exercise_range = np.linspace(X_exercise.min(), X_exercise.max(), 100).reshape(-1, 1)
y_exercise_range_pred = lr_exercise.predict(X_exercise_range)
plt.plot(X_exercise_range, y_exercise_range_pred, color='green', linewidth=2, label='Regression Line')

# Highlight the prediction
plt.scatter(60, predicted_calories[0], color='orange', s=100, marker='*', label=f'Prediction (60 min: {predicted_calories[0]:.0f} calories)')

plt.xlabel('Exercise Duration (Minutes)')
plt.ylabel('Calories Burned')
plt.title('Task 6: Exercise Duration vs Calories Burned')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## Summary

All 6 regression tasks have been completed successfully! Each task demonstrates:

1. **Data Creation**: Manual dataset creation with relevant features
2. **Model Training**: Linear Regression implementation
3. **Predictions**: Making predictions for new input values
4. **Evaluation**: Computing various metrics (MSE, MAE, RMSE, R²)
5. **Visualization**: Scatter plots with regression lines
