In [None]:
import pandas as pd
file_paths = {
    "accomadation": "raw_data/accomadation_eng.xlsx",
    "courses": "raw_data/courses_eng_undergrad.xlsx",
    "extras": "raw_data/extras.xlsx",
    "faq": "raw_data/FAQ_Halil.xlsx",
    "how_to_register": "raw_data/How_will_you_register.xlsx",
    "registration_procedures": "raw_data/REGISTRATION_PROCEDURES.xlsx",
    "sport_clubs": "raw_data/sport_clubs.xlsx",
    "student_clubs": "raw_data/student_clubs_eng.xlsx",
}
dataframes = {name: pd.ExcelFile(path) for name, path in file_paths.items()}
sheets = {name: df.sheet_names for name, df in dataframes.items()}
loaded_dataframes = {
    name: {sheet: dataframes[name].parse(sheet) for sheet in sheets[name]}
    for name in dataframes
}

In [None]:
accomadation_df = loaded_dataframes["accomadation"]["Sheet1"]
accomadation_df = accomadation_df[0] + ": " + accomadation_df[1]
accomadation_df = pd.DataFrame(accomadation_df, columns=["Context"])

In [None]:
courses_df = loaded_dataframes["courses"]["data-1731101247619"]
courses_df.columns = courses_df.columns.str.strip()
for col in courses_df.select_dtypes(include=['object']).columns:
    courses_df[col] = courses_df[col].str.replace(r"\n", " ", regex=True)
courses_df = courses_df.iloc[:,:-1]
def create_context(row):
    return (
        f"Course: {row['course']}\n"
        f"Department: {row['Department']}\n"
        f"Objective: {row['course_objective']}\n"
        f"Content: {row['course_content']}"
    )
courses_df['context'] = courses_df.apply(create_context, axis=1)
courses_df = pd.DataFrame(courses_df.context.values, columns=["Context"])

In [None]:
loaded_dataframes["extras"]["Sheet1"].columns = ["Type", "Context"]
extras_df = loaded_dataframes["extras"]["Sheet1"]
extras_df.loc[-1] = extras_df.columns
extras_df.index = extras_df.index + 1
extras_df = extras_df.sort_index()
extras_df.columns = [0, 1]
extras_df["Context"] = extras_df[0] + ": \n" + extras_df[1]
extras_df = pd.DataFrame(extras_df.Context.values, columns=["Context"])

In [None]:
faq_df = loaded_dataframes["faq"]["Sheet1"][["link","question","answer"]]
faq_df.columns = faq_df.columns.str.strip()
for col in faq_df.select_dtypes(include=['object']).columns:
    faq_df.loc[:, col] = faq_df[col].str.replace(r"\n", " ", regex=True)
def create_context(row):
    return (
        f"FAQ Question: {row['question']}\n"
        f"FAQ Answer: {row['answer']}\n"
        f"FAQ Link: {row['link']}\n"
    )

faq_df = faq_df.copy()
faq_df['context'] = faq_df.apply(create_context, axis=1)
faq_df = pd.DataFrame(faq_df['context'].values, columns=["Context"])


In [None]:
how_to_register_df = loaded_dataframes["how_to_register"]["Sheet1"]
how_to_register_df.columns = how_to_register_df.columns.str.strip()
for col in how_to_register_df.select_dtypes(include=['object']).columns:
    how_to_register_df.loc[:, col] = how_to_register_df[col].str.replace(r"\n", " ", regex=True)
def create_context(row):
    return (
        f"How to Register Question: {row['question']}\n"
        f"How to Register Answer: {row['answer']}\n"
        f"How to Register Link: {row['link']}\n"
    )
how_to_register_df = how_to_register_df.copy()
how_to_register_df['context'] = how_to_register_df.apply(create_context, axis=1)
how_to_register_df = pd.DataFrame(how_to_register_df['context'].values, columns=["Context"])


In [None]:
registration_procedures_df = loaded_dataframes["registration_procedures"]["Sheet1"]
registration_procedures_df.columns = registration_procedures_df.columns.str.strip()
for col in registration_procedures_df.select_dtypes(include=['object']).columns:
    registration_procedures_df.loc[:, col] = registration_procedures_df[col].str.replace(r"\n", " ", regex=True)
def create_context(row):
    return (
        f"Registration Procedures Question: {row['Question']}\n"
        f"Registration Procedures Answer: {row['Answer']}\n"
    )
registration_procedures_df = registration_procedures_df.copy()
registration_procedures_df['context'] = registration_procedures_df.apply(create_context, axis=1)
registration_procedures_df = pd.DataFrame(registration_procedures_df['context'].values, columns=["Context"])

In [None]:
sports_club_tr = loaded_dataframes["sport_clubs"]["Turkish"]
sports_club_en = loaded_dataframes["sport_clubs"]["English"]
sports_club = pd.concat([sports_club_tr, sports_club_en])
sports_club.columns = sports_club.columns.str.strip()
for col in sports_club.select_dtypes(include=['object']).columns:
    sports_club.loc[:, col] = sports_club[col].str.replace(r"\n", " ", regex=True)
def create_context(row):
    return (
        f"Sports Club Name: {row['club_name']}\n"
        f"Sports Club Description: {row['description']}\n"
    )
sports_club = sports_club.copy()
sports_club['context'] = sports_club.apply(create_context, axis=1)
sports_club = pd.DataFrame(sports_club['context'].values, columns=["Context"])

In [None]:
student_club = loaded_dataframes["student_clubs"]["Sheet1"]
student_club.columns = student_club.columns.str.strip()
for col in student_club.select_dtypes(include=['object']).columns:
    student_club.loc[:, col] = student_club[col].str.replace(r"\n", " ", regex=True)
def create_context(row):
    return (
        f"Student Club Name: {row['Student Club']}\n"
        f"Student Club Description: {row['information']}\n"
    )
student_club = student_club.copy()
student_club['context'] = student_club.apply(create_context, axis=1)
student_club = pd.DataFrame(student_club['context'].values, columns=["Context"])

In [133]:
output_paths = {
    "accomadation": "data/accomadation.xlsx",
    "courses": "data/courses.xlsx",
    "extras": "data/extras.xlsx",
    "faq": "data/faq.xlsx",
    "how_to_register": "data/how_to_register.xlsx",
    "registration_procedures": "data/registration_procedures.xlsx",
    "sports_club": "data/sports_club.xlsx",
    "student_club": "data/student_club.xlsx"
}

accomadation_df.to_excel(output_paths["accomadation"], index=False)
courses_df.to_excel(output_paths["courses"], index=False)
extras_df.to_excel(output_paths["extras"], index=False)
faq_df.to_excel(output_paths["faq"], index=False)
how_to_register_df.to_excel(output_paths["how_to_register"], index=False)
registration_procedures_df.to_excel(output_paths["registration_procedures"], index=False)
sports_club.to_excel(output_paths["sports_club"], index=False)
student_club.to_excel(output_paths["student_club"], index=False)
