In [17]:
import plotly.express as px
from scipy.stats import mannwhitneyu
import duckdb
import pandas as pd
from pathlib import Path

In [19]:
# Write a function to load any csv file by name.
def get_csv_path(filename):
    """
    Returns the full path to a CSV file in the data/raw directory.
    """
    project_root = Path.cwd() if (Path.cwd() / 'data').exists() else Path.cwd().parent
    return project_root / 'data' / 'raw' / filename

# Example usage:
# csv_path = get_csv_path('survey_data.csv')
# df = pd.read_csv(csv_path)
# df.head()

# For DuckDB:
def duckdb_read_csv(filename, **kwargs):
    csv_path = get_csv_path(filename)
    query = f"""
    SELECT * FROM read_csv_auto(
        '{csv_path.as_posix()}',
        delim=';',
        decimal_separator=',',
        nullstr=' '
    )
    """
    try:
        print(f"✅ {csv_path} loaded successfully.")
        return duckdb.query(query).to_df()
    except Exception as e:
        print(f"❌ Failed to load {filename}: {e}")
        return None

# Example usage:
duck_df = duckdb_read_csv('What_does_it_take_to_generate_new_growth_Survey_data.csv')
duck_df.head()



✅ c:\Users\mashel\Desktop\ml-scaffolds\ml-platform\data\raw\What_does_it_take_to_generate_new_growth_Survey_data.csv loaded successfully.


Unnamed: 0,Growth_Firm,question_2_row_1_transformed,question_2_row_2_transformed,question_3_row_1,question_3_row_2,question_3_row_3,question_3_row_4,question_3_row_5,question_3_row_6,question_3_row_7,...,question_5_row_4,question_5_row_5,question_5_row_6,question_5_row_7,question_5_row_8,question_5_row_9,question_5_row_10,question_6_row_1,question_6_row_2,question_7_row_1
0,0,35.135135,50.750939,4,5,5,4,3,3,4,...,4,2,4,2,3,2.0,5.0,4,5,1
1,0,23.018043,51.1822,5,4,4,4,4,4,4,...,3,4,3,3,3,4.0,3.0,5,4,1
2,0,86.640472,62.932639,3,4,4,4,4,3,4,...,5,4,4,4,4,,,5,3,1
3,0,17.647059,39.130435,3,4,5,4,4,4,5,...,3,3,4,4,4,3.0,3.0,3,3,1
4,0,60.0,32.802125,4,4,4,4,3,4,4,...,4,2,4,2,3,3.0,4.0,5,2,2


In [29]:
# Get the full path to the CSV file
csv_path = get_csv_path('survey_questions.csv')

# Write and run the SQL query
query = f"""
SELECT * FROM read_csv_auto('{csv_path.as_posix()}')
"""
survey = duckdb.query(query).to_df()
survey.head()

Unnamed: 0,column,question,row,section,title,response_type
0,question_2_row_1_transformed,2,1,estimated growth,Expected employee count in five years (as a pe...,numeric
1,question_2_row_2_transformed,2,2,estimated growth,Expected revenue in five years (as a percent f...,numeric
2,question_3_row_1,3,1,company culture,Employees are encouraged to be creative,agree_disagree
3,question_3_row_2,3,2,company culture,Managers are expected to be creative problem s...,agree_disagree
4,question_3_row_3,3,3,company culture,Employees' ability to function creatively is r...,agree_disagree
