In [1]:
import pandas as pd
from datetime import datetime, time

start_of_corona = datetime.strptime("2020-02-19", "%Y-%m-%d")

def load_data(filepath):
    return pd.read_csv(filepath)


# Find remote_growth = ((remote offers rate after corona) - (remote offers rate before corona)) / (all offers)
def find_remote_growth(df):
    total_len = len(df)
    pre_corona_remote = len(df[(pd.to_datetime(df["Publish Time"]) < start_of_corona) & (df["Offers Remote"] == True)])
    post_corona_remote = len(df[(pd.to_datetime(df["Publish Time"]) >= start_of_corona) & (df["Offers Remote"] == True)])
    remote_growth = (post_corona_remote - pre_corona_remote) / total_len 

    return remote_growth

# Count the number of posts base on time category
def count_posts(df):
    mid_night = time(hour=0, minute=0, second=0)
    dawn = time(hour=5, minute=59, second=59)
    morning = time(hour=11, minute=59, second=59)
    noon = time(hour=17, minute=59, second=59)
    night = time(hour=23, minute=59, second=59)

    # Find discrete_time
    def discretize_time (p_time) :
        if mid_night < p_time.time() < dawn : 
            return "dawn"
        elif p_time.time() < morning :
            return "morning"
        elif p_time.time() < noon :
            return "noon"
        else : return "night"


    df['Publish Time'] = pd.to_datetime(df["Publish Time"]).apply(discretize_time)

    discrete_time = df.groupby(by='Publish Time').size()
    return discrete_time


def data_salary(df):
    keywords = ['machine learning', 'machinelearning', 'داده' , 'data scientist' ,  'datascientist' ,\
            'هوش مصنوعی' ,'پردازش ویدئو' , 'data engineer' , 'dataengineer' ,'بینایی ماشین' , 'یادگیری ماشین' ,\
            'deep learning', 'deeplearning', 'یادگیری عمیق', 'دیتاساینتیست' , 'artificial intelligence' \
            ,'artificialintelligence', 'هوش' , 'data analysis' , 'dataanalysis' , 'پردازش تصویر' , 'شبکه‌های عمیق', 'علم‌داده']


    # Find data_mean
    df["Title"] = df["Title"].str.lower()
    filtered_df = df[df["Title"].str.contains("|".join(keywords))]

    data_mean = filtered_df.groupby(by="Level")["Salary"].mean()
    return data_mean



def analize_job_data(filepath):
    df = load_data(filepath)
    remote_growth = find_remote_growth(df)
    discrete_time = count_posts(df)
    data_mean = data_salary(df)
    # df.reset_index(drop=True, inplace=True)
    return df

df = analize_job_data("../data/processed/job_posts_processed.csv")

