In [None]:
from supabase import create_client

# Replace with your details
url = "https://kvbekcdyfhnpktlmcdvj.supabase.co"
key = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Imt2YmVrY2R5ZmhucGt0bG1jZHZqIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTQ4OTgxNjgsImV4cCI6MjA3MDQ3NDE2OH0.mKTKbxHCuNtfzd2GI3kVpqbHnwc_rleywJ2yM8NKLHM"

supabase = create_client(url, key)


In [None]:
import pandas as pd

# Fetch data from Supabase
response = supabase.table("College-Placements-Data").select("*").execute()

# Create DataFrame
data = pd.DataFrame(response.data)

# # Sort by College Name and Year
# data.sort_values(by=["College Name", "Year"], ascending=[True, True], inplace=True)

# Drop _id column if it exists
if "_id" in data.columns:
    data.drop("_id", axis=1,inplace=True)

# Display nicely in Jupyter
data

In [None]:
data.head(7) # Display the first 10 rows of the DataFrame

In [None]:
print(data[['Year', 'Placement Percentage']].isnull().sum())
constant_cols = data.groupby('College Name').nunique().max() == 1
constant_cols = constant_cols[constant_cols].index.tolist()
print("Constant columns:", constant_cols)

In [None]:
data['Year'] = data['Year'].astype(str).str.strip().str.split('-').str[-1] 

In [None]:
data.head(5)

In [None]:
result = supabase.table("College-Placements-Data") \
                 .select("*") \
                 .eq("College Name", "Christ University") \
                 .execute()

for row in result.data:
    print(row)

Prediction

In [None]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

def predict_placement_and_salary(data, college_name):
    """
    Predicts placement percentage and salary for a given college using ARIMA time series modeling.

    Args:
        data (pd.DataFrame): The input data containing college information.
        college_name (str): The name of the college to predict for.
    """
    # 1. Filter data for the specified college
    college_data = data[data['College Name'] == college_name].copy()  # Create a copy to avoid modifying original

    if college_data.empty:
        print(f"No data found for college: {college_name}")
        return

    # 2. Prepare data for time series analysis
    #    - Ensure 'Year' is a datetime object and set as index
    college_data['Year'] = pd.to_datetime(college_data['Year'], format='%Y')
    college_data = college_data.set_index('Year')
    college_data = college_data.sort_index()  # Sort by year
    college_data.index = college_data.index.to_period('Y').to_timestamp('D')

    # 3. Data type conversion for 'Median Salary (LPA)'
    if 'Median Salary (LPA)' in college_data.columns:
        college_data['Median Salary (LPA)'] = college_data['Median Salary (LPA)'].astype(str).str.replace(' LPA', '', regex=False).astype(float)

    # 4. Time series modeling for Placement Percentage
    try:
        model_placement = ARIMA(college_data['Placement Percentage'], order=(1, 1, 0))  # ARIMA(1,1,0) for trend
        results_placement = model_placement.fit()  # Removed maxiter and method
        # Predict for 2024 and 2025
        predicted_placement_2024 = results_placement.forecast(steps=1).iloc[0]
        predicted_placement_2025 = results_placement.forecast(steps=2).iloc[1]
    except Exception as e:
        print(f"Error fitting ARIMA for Placement Percentage: {e}. Using trend-based adjustment instead.")
        # Calculate historical trend (linear approximation)
        years = college_data.index.year
        placements = college_data['Placement Percentage']
        if len(placements) >= 2:
            trend = (placements.iloc[-1] - placements.iloc[-2]) / (years[-1] - years[-2])  # Slope
            predicted_placement_2024 = placements.iloc[-1] + trend
            predicted_placement_2025 = predicted_placement_2024 + trend
        else:
            predicted_placement_2024 = placements.iloc[-1]
            predicted_placement_2025 = placements.iloc[-1] * 1.01  # Small increase if no trend

    # 5. Time series modeling for Median Salary
    try:
        model_salary = ARIMA(college_data['Median Salary (LPA)'], order=(1, 1, 0))  # ARIMA(1,1,0) for trend
        results_salary = model_salary.fit()  # Removed maxiter and method
        # Predict for 2024 and 2025
        predicted_salary_2024 = results_salary.forecast(steps=1).iloc[0]
        predicted_salary_2025 = results_salary.forecast(steps=2).iloc[1]
    except Exception as e:
        print(f"Error fitting ARIMA for Median Salary: {e}. Using trend-based adjustment instead.")
        # Calculate historical trend (linear approximation)
        years = college_data.index.year
        salaries = college_data['Median Salary (LPA)']
        if len(salaries) >= 2:
            trend = (salaries.iloc[-1] - salaries.iloc[-2]) / (years[-1] - years[-2])  # Slope
            predicted_salary_2024 = salaries.iloc[-1] + trend
            predicted_salary_2025 = predicted_salary_2024 + trend
        else:
            predicted_salary_2024 = salaries.iloc[-1]
            predicted_salary_2025 = salaries.iloc[-1] * 1.01  # Small increase if no trend

    # 6. Print Predictions
    print(f"\n{college_name} Placement and Salary Predictions:")
    print(f"Year: 2024, Predicted Placement Percentage: {predicted_placement_2024:.2f}%, Predicted Median Salary: {predicted_salary_2024:.2f} LPA")
    print(f"Year: 2025, Predicted Placement Percentage: {predicted_placement_2025:.2f}%, Predicted Median Salary: {predicted_salary_2025:.2f} LPA")

    # 7. Plotting
    future_years = pd.to_datetime(['2024', '2025'], format='%Y')
    all_years = college_data.index.union(future_years)
    all_data = college_data.reindex(all_years)

    # Assign the predictions
    all_data.loc['2024-01-01', 'Placement Percentage'] = predicted_placement_2024
    all_data.loc['2025-01-01', 'Placement Percentage'] = predicted_placement_2025
    all_data.loc['2024-01-01', 'Median Salary (LPA)'] = predicted_salary_2024
    all_data.loc['2025-01-01', 'Median Salary (LPA)'] = predicted_salary_2025

    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(all_data.index, all_data['Placement Percentage'], marker='o')
    plt.title('Placement Percentage Trend')
    plt.xlabel('Year')
    plt.ylabel('Placement Percentage (%)')
    plt.grid(True)

    plt.subplot(1, 2, 2)
    plt.plot(all_data.index, all_data['Median Salary (LPA)'], marker='o', color='orange')
    plt.title('Median Salary Trend')
    plt.xlabel('Year')
    plt.ylabel('Median Salary (LPA)')
    plt.grid(True)

    plt.suptitle(f'Placement and Salary Trends for {college_name}', fontsize=16)
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    college_name = input("Enter college name: ")
    predict_placement_and_salary(data, college_name)