In [30]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm

In [43]:
# add spring 2024
CORE_LIST = ['101', '128', '261', '262', '200', '220', '274', '306', '341', '358', '400', '403', '406', '442']
NUM_LIST = ['Act']
file = 'Spring24.csv'
sem_df = pd.read_csv('../schedules/' + file)
# change CrseNum to string
sem_df['CrseNum'] = sem_df['CrseNum'].astype(str)

# filter to only CrseNum in CORE_LIST
core_df = sem_df[sem_df['CrseNum'].isin(CORE_LIST)]

# set all columns in NUM_LIST to int
for col in NUM_LIST:
    core_df.loc[:,col] = core_df[col].astype(int)

# group by CrseNum and sum across NUM_LIST
core_df = core_df.groupby(['CrseNum','Title']).sum().reset_index()

# drop unncessary columns
core_df = core_df[NUM_LIST + ['CrseNum', 'Title']]

# add a column for the semester
core_df['Year_Semester'] = 'Spring 2024'
core_df['Class'] = 'CSCI' + core_df['CrseNum']
core_df = core_df.drop(['CrseNum', 'Title'], axis=1)
core_df.rename(columns={"Act": "Taken"}, inplace=True)

display(core_df)

Unnamed: 0,Taken,Year_Semester,Class
0,572,Spring 2024,CSCI128
1,302,Spring 2024,CSCI200
2,200,Spring 2024,CSCI220
3,106,Spring 2024,CSCI274
4,168,Spring 2024,CSCI306
5,170,Spring 2024,CSCI341
6,194,Spring 2024,CSCI358
7,111,Spring 2024,CSCI400
8,160,Spring 2024,CSCI403
9,222,Spring 2024,CSCI406


In [57]:
taken = pd.read_csv('../data/taken.csv')
# eligible = pd.read_csv('../data/eligible.csv')

taken = taken[~taken['Year_Semester'].str.contains('Summer')]
taken = taken[~taken['Year_Semester'].str.contains('Fall 2024')]
taken = taken[taken['Taken'] != 0]
eligible = eligible[~eligible['Year_Semester'].str.contains('Summer')]
eligible = eligible[~eligible['Year_Semester'].str.contains('Fall 2024')]

taken = pd.concat([taken, core_df])
taken.reset_index(inplace=True)

taken['id'] = taken.index
eligible['id'] = eligible.index

taken.drop('index', axis=1, inplace=True)

# merged_df = taken.set_index('id').combine_first(eligible.set_index('id'))
display(taken)

Unnamed: 0,Year_Semester,Taken,Class,id
0,Fall 2018,306,CSCI128,0
1,Spring 2019,81,CSCI128,1
2,Fall 2017,248,CSCI128,2
3,Spring 2020,213,CSCI128,3
4,Fall 2020,407,CSCI128,4
...,...,...,...,...
193,Spring 2024,194,CSCI358,193
194,Spring 2024,111,CSCI400,194
195,Spring 2024,160,CSCI403,195
196,Spring 2024,222,CSCI406,196


In [58]:
semester_mapping = {
    'Spring': 1,
    'Fall': 2,
}

def increment_semester_sequence(row):
    semester, year = row['Year_Semester'].split()
    sequence = (int(year) - 2017) * 2 + semester_mapping[semester] - 1
    return sequence

taken['semester_sequence'] = taken.apply(increment_semester_sequence, axis=1)
    
# merged_df.loc[:, 'semester_sequence'] = merged_df.apply(lambda row: convert_to_sequence(row['Year_Semester']), axis=1)
unique_classes = taken['semester_sequence'].unique()
display(taken[taken['Year_Semester'] == 'Spring 2024'])
print(sorted(unique_classes))

Unnamed: 0,Year_Semester,Taken,Class,id,semester_sequence
187,Spring 2024,572,CSCI128,187,14
188,Spring 2024,302,CSCI200,188,14
189,Spring 2024,200,CSCI220,189,14
190,Spring 2024,106,CSCI274,190,14
191,Spring 2024,168,CSCI306,191,14
192,Spring 2024,170,CSCI341,192,14
193,Spring 2024,194,CSCI358,193,14
194,Spring 2024,111,CSCI400,194,14
195,Spring 2024,160,CSCI403,195,14
196,Spring 2024,222,CSCI406,196,14


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]


In [70]:
unique_classes = merged_df['Class'].unique()

forecasts = {}
conf_ints = {}

for class_name in unique_classes:
    class_data = taken[taken['Class'] == class_name].copy()  # Explicitly create a copy
    
    # Set 'semester_sequence' as the index
    class_data.set_index('semester_sequence', inplace=True)
    
    # Sort the DataFrame by the index
    class_data.sort_index(inplace=True)
    
    # Assuming you have the 'Taken' values as a time series for each class
    taken_series = class_data['Taken']
    
    # Fit ARIMA model
    model = ARIMA(taken_series, order=(2, 1, 0))
    model_fit = model.fit()
    
    # Forecast the next semester
    forecast = model_fit.get_forecast(steps=1)
    yhat = forecast.predicted_mean
    yhat_conf_int = forecast.conf_int(alpha=0.05)
    
    forecasts[class_name] = yhat
    conf_ints[class_name] = yhat_conf_int

# Print forecasts for each class
for class_name, forecast in forecasts.items():
    print("Forecast for", class_name, "in next semester:", forecast)
    print("95% Confidence Interval:", conf_ints[class_name])

Forecast for CSCI128 in next semester: 14    751.202774
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14   562.931106   939.474441
Forecast for CSCI200 in next semester: 14    305.195104
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14   195.043677   415.346531
Forecast for CSCI220 in next semester: 14    217.641314
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14   119.949861   315.332766
Forecast for CSCI274 in next semester: 14    170.623386
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14    92.879267   248.367504
Forecast for CSCI306 in next semester: 14    166.063642
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14   115.635744    216.49154
Forecast for CSCI341 in next semester: 14    162.819617
dtype: float64
95% Confidence Interval:     lower Taken  upper Taken
14   116.496876   209.142358
Forecast for CSCI358 in next semester: 14    191.774636
dtype: float64
95% C

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(d