In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Load dataset
file_path = 'job_placement.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Selecting relevant columns
features = ['gender', 'age', 'stream', 'college_name', 'gpa', 'years_of_experience']
target = 'placement_status'

# Encode categorical variables
encoder = LabelEncoder()
for column in ['gender', 'stream', 'college_name']:
    data[column] = encoder.fit_transform(data[column])

# Encode the target variable
data[target] = encoder.fit_transform(data[target])

# Split features and target
x = data[features]
y = data[target]

# Normalize numerical features for better performance
scaler = StandardScaler()
x[['age', 'gpa']] = scaler.fit_transform(x[['age', 'gpa']])

# Split dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create KNN model
knn = KNeighborsClassifier(n_neighbors=5)  # You can tune the value of 'n_neighbors'
knn.fit(x_train, y_train)

# Make predictions
y_pred = knn.predict(x_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

import matplotlib.pyplot as plt

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Placed', 'Placed'], yticklabels=['Not Placed', 'Placed'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [112]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
file_path = 'job_placement.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Check unique values in 'placement_status' column
print(data['placement_status'].unique())

# Filter data for placed candidates
placed_candidates = data[data['placement_status'] == 1]  # Assuming 1 represents 'Placed' after encoding

# Selecting relevant columns
features = ['gender', 'age', 'stream', 'college_name', 'years_of_experience', 'gpa']
target = 'salary'

# Encode categorical variables
encoder = LabelEncoder()
for column in ['gender', 'stream', 'college_name']:
    placed_candidates[column] = encoder.fit_transform(placed_candidates[column])

# Split features and target
X = placed_candidates[features]
y = placed_candidates[target]

# Normalize numerical features for better performance
scaler = StandardScaler()
X[['age', 'years_of_experience']] = scaler.fit_transform(X[['age', 'years_of_experience']])

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Make predictions
y_pred = lr.predict(X_test)
y_pred = y_pred.astype(int)

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")
print(f"Root Mean Squared Error: {mse**0.5:.2f}")

[1 0]
Mean Squared Error: 3556664.37
R^2 Score: 0.30
Root Mean Squared Error: 1885.91


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(placed_candidates[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(placed_candidates[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(p

In [110]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load dataset
file_path = 'job_placement.csv'  # Replace with your actual file path
data = pd.read_csv(file_path)

# Filter data for placed candidates
placed_candidates = data[data['placement_status'] == 1]  # Assuming 1 represents 'Placed' after encoding

# Selecting relevant columns
features = ['gender', 'age', 'stream', 'college_name', 'years_of_experience']
target = 'salary'

# Encode categorical variables
encoder = LabelEncoder()
for column in ['gender', 'stream', 'college_name']:
    placed_candidates[column] = encoder.fit_transform(placed_candidates[column])

# Split features and target
X = placed_candidates[features]
y = placed_candidates[target]

# Normalize numerical features for better performance
scaler = StandardScaler()
X[['age', 'years_of_experience']] = scaler.fit_transform(X[['age', 'years_of_experience']])

# Polynomial Features (degree=2)
poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_poly, y, test_size=0.2, random_state=42)

# Create and train linear regression model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Make predictions
y_pred = lr.predict(X_test)
y_pred = np.maximum(y_pred, 0).astype(int)  # Ensure no negative salaries

# Evaluate model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R^2 Score: {r2:.2f}")

# Print predicted and real salaries side by side
comparison = pd.DataFrame({'Predicted Salary': y_pred, 'Real Salary': y_test})
print(comparison)

Mean Squared Error: 3982195.65
R^2 Score: 0.22
     Predicted Salary  Real Salary
622             62757        60000
88              65308        67000
164             63292        64000
492             65699        65000
664             66213        66000
..                ...          ...
247             66592        68000
94              63140        66000
308             66663        66000
518             63155        63000
443             66009        66000

[114 rows x 2 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(placed_candidates[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(placed_candidates[column])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  placed_candidates[column] = encoder.fit_transform(p

In [43]:
# Create a DataFrame with the predicted and real salary values
salary_comparison = pd.DataFrame({'Predicted Salary': y_pred, 'Real Salary': y_test})

# Export the DataFrame to a CSV file
salary_comparison.to_csv('salary_comparison.csv', index=False)