In [None]:
import pandas as pd
from datetime import datetime, timedelta

# Assuming your dataset is in a DataFrame called df
# and has columns 'line_of_business', 'delivery_unit', 'practice_unit', and 'job_end_date'

# Convert job_end_date to datetime
df['job_end_date'] = pd.to_datetime(df['job_end_date'])

# Define the end date as September 30, 2024
end_date = datetime(2024, 9, 30)
start_date = end_date - timedelta(days=2*365)
df_filtered = df[(df['job_end_date'] >= start_date) & (df['job_end_date'] <= end_date)]

# Create a DataFrame with all months in the last 2 years
all_months = pd.date_range(start=start_date, end=end_date, freq='MS').to_frame(index=False, name='month')

# Create a DataFrame with all combinations of line_of_business, delivery_unit, practice_unit, and month
unique_combinations = df_filtered[['line_of_business', 'delivery_unit', 'practice_unit']].drop_duplicates()
all_combinations = unique_combinations.merge(all_months, how='cross')

# Extract year and month from job_end_date
df_filtered['year_month'] = df_filtered['job_end_date'].dt.to_period('M')

# Check for missing months
missing_months = all_combinations.merge(
    df_filtered[['line_of_business', 'delivery_unit', 'practice_unit', 'year_month']],
    how='left',
    left_on=['line_of_business', 'delivery_unit', 'practice_unit', 'month'],
    right_on=['line_of_business', 'delivery_unit', 'practice_unit', 'year_month']
)

# Identify missing entries
missing_months = missing_months[missing_months['year_month'].isna()]

# Display missing months
print("Missing months for each line of business, delivery unit, and practice unit:")
print(missing_months[['line_of_business', 'delivery_unit', 'practice_unit', 'month']])


In [None]:
!pip install fosforml 
!pip install fosforio

Collecting fosforml
[?25l  Downloading https://files.pythonhosted.org/packages/94/2e/3613fd0ccdbf3709dec86f87fe7624737a6f08bd1a813c88e65e7352dfde/fosforml-1.1.8-py3-none-any.whl (42kB)
[K     |████████████████████████████████| 51kB 6.6MB/s eta 0:00:011
[?25hCollecting scikit-learn==1.3.2
[?25l  Downloading https://files.pythonhosted.org/packages/25/89/dce01a35d354159dcc901e3c7e7eb3fe98de5cb3639c6cd39518d8830caa/scikit_learn-1.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9MB)
[K     |████████████████████████████████| 10.9MB 28.5MB/s eta 0:00:01
[?25hCollecting snowflake-ml-python==1.5.0; python_version <= "3.9"
[?25l  Downloading https://files.pythonhosted.org/packages/80/72/c0fa5a9bc811a59a5a1c7113ff89676ed1629d7d6463db8c1a8c97a8b5f6/snowflake_ml_python-1.5.0-py3-none-any.whl (1.9MB)
[K     |████████████████████████████████| 1.9MB 82.2MB/s eta 0:00:01
[?25hCollecting cloudpickle==2.2.1
  Downloading https://files.pythonhosted.org/packages/15/80/44286939ca21