In [1]:
import sys
import os
import pandas as pd


# Add project root to sys.path in Jupyter or interactive session
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "../../../")))

from config.path_config import DATA_DIR, PROJECT_DIR, DEPENDENCIES_DIR
from src.helper_functions.data_pre_processing.data_upload_processor.processor import *
from src.helper_functions.database_upload.postgres_uploader import *

from dotenv import load_dotenv

In [2]:
yaml_file_path = "testing_mapping.yaml"
print(yaml_file_path)

testing_mapping.yaml


In [3]:
raw_data = pd.read_excel(os.path.join(DATA_DIR, "wave_10_raw_data.xlsx"))
data_map = pd.read_excel(os.path.join(DATA_DIR, "wave_10_data_map.xlsx"))

In [4]:
env_path = os.path.join(DEPENDENCIES_DIR, ".env")
load_dotenv(env_path)
api_key = os.getenv("OPENAI_API_KEY")

In [5]:
data_dict = run_question_data_pipeline(data_map, raw_data, api_key,'wave_10')

  text_splitter = SemanticChunker(OpenAIEmbeddings(api_key=api_key), breakpoint_threshold_type="standard_deviation")


In [6]:
engine = create_postgres_engine('postgres', 'postgres', 'localhost', 5432, 'cl_survey_data')

In [7]:
data_dict.keys()

dict_keys(['question_guide', 'mapped_data', 'type_subtype', 'question_dict', 'embeddings_metadata_df', 'embedding_df'])

In [8]:
push_dataframe_to_postgres_db(data_dict['embeddings_metadata_df'], 'wave_10_embeddings_metadata', engine)
push_dataframe_to_postgres_db(data_dict['type_subtype'], file_name='wave_10_type_subtype', engine=engine)
push_dataframe_to_postgres_db(data_dict['question_guide'], 'wave_10_question_guide', engine)
process_raw_data(data_dict['mapped_data'], 'wave_10', engine, response_id_col='Respondent', chunk_size=60)

2025-04-11 17:48:36.224 
  command:

    streamlit run C:\Users\72670\AppData\Roaming\Python\Python313\site-packages\ipykernel_launcher.py [ARGUMENTS]


In [13]:
a = load_full_survey_dataset(engine, 'wave_10', yaml_file_path, max_chunks=100)



In [18]:
a.keys()

dict_keys(['mapped_raw_data', 'type_subtype', 'question_guide', 'embeddings_metadata'])

In [None]:
a['mapped_raw_data']

Unnamed: 0,Question_no.,Question,Type,Sub-type
0,Q4,What is your age?,Multiple choice,single-select
1,Q5,What is your <zip code|post code (Outward or f...,Multiple choice,single-select
2,Q20,Please describe what are you most excited / op...,Multiple choice,single-select
3,Q21,Please describe what are you most worried / pe...,Multiple choice,single-select
4,Q30,You said your household typically eats dinner ...,Matrix,single-select
...,...,...,...,...
73,Q75,How excited are you about AI’s ability to impr...,Matrix,single-select
74,Q76,How much do you trust the following if they we...,Matrix,single-select
75,Q77,How often do you use the following with the su...,Matrix,single-select
76,Q78,Please rank the top 3 potential downsides of A...,Matrix,single-select


In [15]:
a['question_guide']

Unnamed: 0,Question_code,Question_string,answer_code,answer_string
0,Q4,What is your age?,,What is your age?
1,Q5,What is your <zip code|post code (Outward or f...,,What is your <zip code|post code (Outward or f...
2,Q20,Please describe what are you most excited / op...,,Please describe what are you most excited / op...
3,Q21,Please describe what are you most worried / pe...,,Please describe what are you most worried / pe...
4,Q30,You said your household typically eats dinner ...,,You said your household typically eats dinner ...
...,...,...,...,...
620,Q79,Who would you trust the most to protect you fr...,4,"Cyber security firms (e.g., McAfee, Norton, Bi..."
621,Q79,Who would you trust the most to protect you fr...,5,Grassroots activist groups “Hacktivists” (e.g....
622,Q79,Who would you trust the most to protect you fr...,6,Myself or a family member
623,Q79,Who would you trust the most to protect you fr...,7,No one


In [16]:
a['embeddings_metadata']

Unnamed: 0,question_code,question_text,question_with_answers,answer_options,source,chunk_text,primary_key
0,Q10,What is your current employment status?,What is your current employment status? Full-t...,Full-time employed in one job (working >30 hou...,wave_10,What is your current employment status? Full-t...,wave_10_0
1,Q11,What was your total household income before ta...,What was your total household income before ta...,"Less than <currency>25,000|| <currency>25,000 ...",wave_10,What was your total household income before ta...,wave_10_1
2,Q12,What was your total personal income before tax...,What was your total personal income before tax...,"Less than <currency>25,000|| <currency>25,000 ...",wave_10,What was your total personal income before tax...,wave_10_2
3,Q13,What is your household / family situation? Ple...,What is your household / family situation? Ple...,I have no children|| I have a child(ren) under...,wave_10,What is your household / family situation? Ple...,wave_10_3
4,Q14,Who do you live with? Please select all that a...,Who do you live with? Please select all that a...,Live alone|| Live with my partner|| Live with ...,wave_10,Who do you live with? Please select all that a...,wave_10_4
...,...,...,...,...,...,...,...
73,Q77,How often do you use the following with the su...,How often do you use the following with the su...,1\nNot at all|| 2|| 3|| 4|| 5\nVery often|| I ...,wave_10,How often do you use the following with the su...,wave_10_73
74,Q78,Please rank the top 3 potential downsides of A...,Please rank the top 3 potential downsides of A...,AI will replace my job|| AI will steal my iden...,wave_10,Please rank the top 3 potential downsides of A...,wave_10_74
75,Q79,Who would you trust the most to protect you fr...,Who would you trust the most to protect you fr...,Governments|| International organizations (e.g...,wave_10,Who would you trust the most to protect you fr...,wave_10_75
76,Q8,If you relocated (moved to a new place and est...,If you relocated (moved to a new place and est...,I have not relocated in the past year|| I was ...,wave_10,If you relocated (moved to a new place and est...,wave_10_76


In [17]:
a['type_subtype']

Unnamed: 0,Question_no.,Question,Type,Sub-type
0,Q4,What is your age?,Multiple choice,single-select
1,Q5,What is your <zip code|post code (Outward or f...,Multiple choice,single-select
2,Q20,Please describe what are you most excited / op...,Multiple choice,single-select
3,Q21,Please describe what are you most worried / pe...,Multiple choice,single-select
4,Q30,You said your household typically eats dinner ...,Matrix,single-select
...,...,...,...,...
73,Q75,How excited are you about AI’s ability to impr...,Matrix,single-select
74,Q76,How much do you trust the following if they we...,Matrix,single-select
75,Q77,How often do you use the following with the su...,Matrix,single-select
76,Q78,Please rank the top 3 potential downsides of A...,Matrix,single-select


In [19]:
get_wave_n_list(engine)

['wave_10']