## Set up

In [17]:
#imports
import os, sys, json, datetime  # Provides OS-dependent functionality, system-specific parameters, JSON handling, and date/time manipulation
import pandas as pd             # Provides data structures and data analysis tools
import numpy as np              # Supports large, multi-dimensional arrays and matrices
import requests
import time
import glob
import re
from tqdm import tqdm
import shutil

In [18]:
def classify_response(j):
    
    if isinstance(j, (int, float)):
        if isinstance(j, float) and str(j) == 'nan':
            return np.nan         
        elif isinstance(j, (int, float)):
            # print(j)
            return 'quantitative'
    elif re.search(r'^yes|^no', str(j).lower().strip()):
        return "bool"
    else:        
        return 'qualitative'

### Just for ECLS  

####
ecls_survey_data = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey files to clean up\edited\ECLS_evaluation_2024.xlsx"
ecls_df = pd.read_excel(ecls_survey_data)


####
#what actions are you prepared to take
sel_col = ecls_df.columns[8]

all_resp = len(ecls_df)

q8 = ecls_df[~ecls_df[sel_col].isna()]
q8_resp = len(q8)

response_rate = q8_resp/all_resp

q8_share = ecls_df[ecls_df[sel_col].str.contains('Sharing ideas', na=False)]
q8_identify = ecls_df[ecls_df[sel_col].str.contains('Identify clear', na=False)]
q8_committee = ecls_df[ecls_df[sel_col].str.contains('Create a ', na=False)]
q8_build = ecls_df[ecls_df[sel_col].str.contains('Build consensus', na=False)]
q8_propose = ecls_df[ecls_df[sel_col].str.contains('Propose policy actions', na=False)]

counts = {"q8_share": q8_share, "q8_identify": q8_identify, "q8_committee": q8_committee, "q8_build":q8_build, "q8_propose": q8_propose}

q8_dict = {}
for k,c in counts.items():
    n = len(c)
    q8_dict[k] = f'{round((n/all_resp), 2) * 100}%'



for k,v in q8_dict.items():
    print(k,v)
    print('\n')

####
q1 = ecls_df.columns[0]
q2 = ecls_df.columns[1]
q3 = ecls_df.columns[2]
q4 = ecls_df.columns[3]
q5 = ecls_df.columns[4]
q6 = ecls_df.columns[5]
q7 = ecls_df.columns[6]


####likert q's
for ic, col in enumerate(ecls_df.columns):
    
    num_resp = len(ecls_df)
    if ic == 0: 
        continue
    elif ic == 1:
        yes = ecls_df[ecls_df[col].str.contains("Yes", na=False)]
        yes_num = len(yes)
        yes_rr = round((yes_num/num_resp)*100, 2)
        print(f'{yes_rr}% of respondents found ECLS to be useful')
    elif ic > 6:
        # print(col)
        continue
    else:
        new_col = pd.to_numeric(ecls_df[col], errors = "coerce")
        positive = len(new_col[new_col >= 3])
        percent_pos = round((positive/num_resp)*100, 2)
        # print(new_col)
        avg = round(new_col.mean(skipna=True), 2)
        # print(col)

        print(col)
        print(f'positive response: {percent_pos}%')
        print(f'average response: {avg}')
    

    


%%

## Data Gathering

In [19]:
#grab files from edited folder
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey files to clean up\edited')
edited_files = glob.glob('*.xlsx')

In [20]:
#survey formatter
all_surveys = {}
for file in tqdm(edited_files):
    
    # file = edited_files[0]
    df = pd.read_excel(file)
    anon_count = 0
    try:
        test = df['respondent'].head(1)
    except:
        print(file)
        break

    rows_with_all_missing = df.isnull().all(axis=1)
    n_missing_rows = len(rows_with_all_missing[rows_with_all_missing==True])
    if n_missing_rows > 0:
        print('!!!!!!!!!!!!!!!')
        print(n_missing_rows)
    # for i,j in reversed(enumerate(df['respondent'])):
    #     if str(j) != 'nan':
    #         last


    for i,j in enumerate(df['respondent']):
        if str(j) == 'nan' or len(str(j)) == 0 or str(j) == "" or 'anon' in str(j).lower():
            anon_count += 1
            new_value = f'Anon {anon_count}'
            df.loc[i,'respondent'] = new_value

    # df.loc[df[]]
    # print("##################")
    # print(file)
    # print(df.columns[0])
    # print("\n")
    dfs = []
    for name, data in df.items():
        if str(name) == 'respondent' or str(name).lower().strip() == 'name':
            index_col = str(name)
            continue
        
        n = len(data)
        
        respondents = df[index_col].to_list()
        # print(respondents)
        quest = [str(name)]*n
        response = data.to_list()
        df_app = pd.DataFrame({'respondent':respondents, 'question': quest, 'response': response})

        # if "NCCCS" in str(file):
        #     print(df_app.to_string())
        dfs.append(df_app)
    survey_data = pd.concat(dfs)
    all_surveys[file] = survey_data

100%|██████████| 25/25 [00:00<00:00, 87.99it/s]


In [21]:
#pull all survey data together
modified_dfs = []
for file, df in all_surveys.items():
    # print('##########')
    # print(file)
    event_name = re.sub(r'\s{2}', " ", re.sub(r'evaluation|eval|results', '', str(file).split(".", 1)[0].replace("_", " "), flags=re.IGNORECASE).strip(),flags=re.IGNORECASE)
    # print(event_name)
    df['event'] = event_name
    modified_dfs.append(df)    # Append modified DataFrame to the list
combined_df = pd.concat(modified_dfs, ignore_index=True)
combined_df


Unnamed: 0,respondent,question,response,event
0,Robin McConnell (Cnetral Piedmont),Avanza Convening 2,5,Avansa 2024
1,Sheena Ashley (Central Piedmont),Avanza Convening 2,4,Avansa 2024
2,Dr. Joevanne Estrada (Central Piedmont),Avanza Convening 2,5,Avansa 2024
3,Tracie Clark (Central Piedmont),Avanza Convening 2,5,Avansa 2024
4,Jennifer Recendez (Randolph CC),Avanza Convening 2,5,Avansa 2024
...,...,...,...,...
7280,Anon 4,Please share any other comments and/or feedbac...,,WV SLR 2024
7281,Anon 5,Please share any other comments and/or feedbac...,,WV SLR 2024
7282,Anon 6,Please share any other comments and/or feedbac...,,WV SLR 2024
7283,Anon 7,Please share any other comments and/or feedbac...,,WV SLR 2024


In [22]:

combined_df.loc[:,'event_type'] = np.nan
for i,j in enumerate(combined_df['event']):
    
    if re.search(r'\d{4}', str(j)):
        event_type = "annual"
    elif re.search(r'C\d{1,2}\s?(M\d{1,2})?', str(j)):
        event_type = "cohort"
    else:
        event_type = "other"
    combined_df.loc[i,'event_type'] = event_type

  combined_df.loc[i,'event_type'] = event_type


In [23]:
#use classify response
combined_df['data_type'] = combined_df['response'].apply(classify_response)
dict_creation = combined_df.loc[:,['question', 'data_type']]
dict_creation = dict_creation.drop_duplicates().reset_index(drop = True)
dict_creation = dict_creation.dropna(subset='data_type').reset_index(drop = True)

data_dict =dict(zip(dict_creation['question'], dict_creation['data_type']))

# for k,v in data_dict.items():
#     print(k)
#     print(v)

values = []
for i,(l,j) in enumerate(zip(combined_df['question'], combined_df['data_type'])):
    if str(j) == 'nan':
        # print('##############')
        # print(l)
        value = data_dict.get(l)
        # values.append(value)
        if value is None:
            value = np.nan
        combined_df.loc[i,'data_type'] = value
        # if value is None:
        #     print(str(l))

# print(list(set(values)))


In [24]:

q_type_dict = dict(zip(dict_creation['question'],dict_creation['question']))



In [25]:

combined_df

Unnamed: 0,respondent,question,response,event,event_type,data_type
0,Robin McConnell (Cnetral Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative
1,Sheena Ashley (Central Piedmont),Avanza Convening 2,4,Avansa 2024,annual,quantitative
2,Dr. Joevanne Estrada (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative
3,Tracie Clark (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative
4,Jennifer Recendez (Randolph CC),Avanza Convening 2,5,Avansa 2024,annual,quantitative
...,...,...,...,...,...,...
7280,Anon 4,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative
7281,Anon 5,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative
7282,Anon 6,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative
7283,Anon 7,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative


ooking for Net Promoter Questions

In [26]:
combined_df['nps'] = False
for i,j in enumerate(combined_df['question']):
    if re.search(r'recommend', str(j).lower()):
        combined_df.loc[i, ['nps']] = True
        
combined_df = combined_df.dropna(subset='data_type').reset_index(drop=True)
# combined_df

In [27]:
combined_df.loc[:,['qual_response']] = np.nan
combined_df.loc[:,['quant_response']] = np.nan
combined_df.loc[:,['bool_response']] = np.nan


for i,j in enumerate(combined_df['data_type']):
    response = combined_df.loc[i,'response']
    if j == 'quantitative':
        combined_df.loc[i,'quant_response'] = response
        # continue
    elif j == 'bool':
        combined_df.loc[i,'bool_response'] = response
        # continue
    elif j == 'qualitative':
        combined_df.loc[i,'qual_response'] = response
        # continue

anon_count = 1
for i,j in enumerate(combined_df['respondent']):
    # print(j)
    if str(j) == "nan":
        new_value = f'Anon {anon_count}'
        # print(new_value)
        anon_count += 1
        combined_df.loc[i,'respondent'] = new_value

# combined_df.loc[combined_df['respondent'].isna(), 'respondent'] = 'Anon'



  combined_df.loc[i,'bool_response'] = response
  combined_df.loc[i,'qual_response'] = response


In [28]:
# w_nps = []
# wo_nps = []
# for i,j in enumerate(combined_df['question']):
#     if re.search(r'recommend', str(j).lower()):
#         # print(j)
#         w_nps.append(combined_df.loc[i,'event'])
#     # else:
#         # print(j)
#         # wo_nps.append(combined_df.loc[i,'event'])



In [29]:

# w_nps = list(set(w_nps))
# wo_nps = list(set(wo_nps))
# # print(*w_nps, sep = "\n")

# test_nps = combined_df[~combined_df['event'].isin(w_nps)]
# test_nps = test_nps.loc[:,['question', 'event']]
# test_nps = test_nps.drop_duplicates()
# test_nps_events = test_nps.loc[:, 'event'].to_list()
# test_nps_events = list(set(test_nps_events))

# print(*test_nps_events, sep = '\n')

# # for i,j in enumerate(test_nps['event']):
# #     print(j)

In [30]:
#looking for net promoter questions
# combined_df.loc[combined_df['question'].str.contains('recommend ', regex=True), 'data_type'] = 'nps'

In [31]:
#convert 10 pt scale to likert

ten_point_events = ['Elevate NC C4 M4', 'HSPF C4 M2']

combined_df.loc[combined_df['event'].isin(ten_point_events), 'quant_response'] = combined_df['quant_response'].apply(lambda x: x/2 if x is not None else x)
combined_df

Unnamed: 0,respondent,question,response,event,event_type,data_type,nps,qual_response,quant_response,bool_response
0,Robin McConnell (Cnetral Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
1,Sheena Ashley (Central Piedmont),Avanza Convening 2,4,Avansa 2024,annual,quantitative,False,,4.0,
2,Dr. Joevanne Estrada (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
3,Tracie Clark (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
4,Jennifer Recendez (Randolph CC),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
...,...,...,...,...,...,...,...,...,...,...
7204,Anon 4,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7205,Anon 5,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7206,Anon 6,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7207,Anon 7,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,


In [32]:
for i,j in enumerate(combined_df['quant_response']):
    if j > 5:
        print(combined_df.loc[i,'event'])


In [33]:
#export all data 
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\tableau\survey view\data sources')
combined_df.to_excel('master_survey_sheet.xlsx', index=False)
combined_df


Unnamed: 0,respondent,question,response,event,event_type,data_type,nps,qual_response,quant_response,bool_response
0,Robin McConnell (Cnetral Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
1,Sheena Ashley (Central Piedmont),Avanza Convening 2,4,Avansa 2024,annual,quantitative,False,,4.0,
2,Dr. Joevanne Estrada (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
3,Tracie Clark (Central Piedmont),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
4,Jennifer Recendez (Randolph CC),Avanza Convening 2,5,Avansa 2024,annual,quantitative,False,,5.0,
...,...,...,...,...,...,...,...,...,...,...
7204,Anon 4,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7205,Anon 5,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7206,Anon 6,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,
7207,Anon 7,Please share any other comments and/or feedbac...,,WV SLR 2024,annual,qualitative,False,,,


In [31]:
#split qualitative and quantitative data
survey_data_qual = combined_df[combined_df['data_type'] == 'qualitative']
survey_data_quant = combined_df[~(combined_df['data_type'] == 'qualitative')]
survey_data_bool = combined_df[(combined_df['data_type'] == 'bool')]


In [32]:

#export split files
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\tableau\survey view\data sources')
survey_data_bool.to_excel('survey_data_bool.xlsx', index=False)
survey_data_qual.to_excel('survey_data_qual.xlsx', index=False)
survey_data_quant.to_excel('survey_data_quant.xlsx', index=False)

## Rest of this is Defunct

In [None]:
#make all else qualitative
survey_data['data_type'] = survey_data['response'].apply(
    lambda x: 'quantitative' if isinstance(x, (int, float)) and str(x) != 'nan' else 'qualitative'
)
# survey_data.loc[isinstance(survey_data['response'], (int, float)), 'data_type'] = 'quantitative'
# survey_data.loc[isinstance(survey_data['response'], str), 'data_type'] = 'qualitative'

%%

In [None]:
survey_data['data_type'] = ''
for i,j in enumerate(survey_data['response']):
    # print(str(j))
    # if isinstance(j, float):
    #     try:
    #         survey_data.loc[i, "response"] = str(j).astype(int)
            
    
    #     except:
    #         print(survey_data.loc[i,:])
    #         print('not working')
    #         break

In [None]:
    survey_data.loc[isinstance(survey_data['response'], (int, float)), 'data_type'] = 'quantitative'
    survey_data.loc[isinstance(survey_data['response'], str), 'data_type'] = 'qualitative'
    # if isinstance(j,str):
    #     survey_data.loc[i, "data_type"] = 'qualitative'
    # elif isinstance(j, (int, float)):
    #     survey_data.loc[i, "data_type"] = 'quantative'
    # else:
    #     print(str(j) + " isn't either str or int")

%%

In [None]:
os.chdir(r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey files to clean up')
survey_data.to_excel('survey_data_test.xlsx')

%%

In [None]:
file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey data\survey_data_monday_export_9_16.xlsx"
survey_update = pd.read_excel(file)

In [None]:
file_paths_surveys = survey_update['file_path'].to_list()

In [None]:
zero_files = []
path_error = []
files_list = {}
for i,f in enumerate(file_paths_surveys):
    # path = survey_update['file_path'].iloc[1]
    # print(path)
    path = f
    
    # print('###############')
    # print(survey_update['event'].iloc[i])
    # print(path)
    try:
        os.chdir(path)
    except:
        print('no')
        path_error.append(path)
        continue
    files = glob.glob('*.xlsx')

In [None]:
   
    if len(files) == 0:
        zero_files.append(path)
    elif len(files) == 2:
        for option in files:
            if 'data' in str(option):
                files_list[path] = option
    else:
        files_list[path] = files[0]
    # data = pd.read_excel(file)

%%

Destination folder where you want to copy the files

In [None]:
destination_folder = r'C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey files to clean up'

Create the destination folder if it doesn't exist

In [None]:
os.makedirs(destination_folder, exist_ok=True)

In [None]:
didnt_work = []
# Loop through each file path
for k,v in files_list.items():
    
    file_path = os.path.join(k,v)
    print(file_path)
    try:
        # Copy the file to the destination folder
        shutil.copy(file_path, destination_folder)
        print(f'Copied: {file_path} to {destination_folder}')
    except Exception as e:
        print(f'Error copying {file_path}: {e}')
        didnt_work.append(file_path)

%%

In [None]:
file = r"C:\Users\clutz\OneDrive - THE HUNT INSTITUTE\Documents\Data\surveys\survey files to clean up\edited\HLR_2024_eval_results.xlsx"
df = pd.read_excel(file)
print(df.columns)

%%

In [None]:
dfs = []
for col_name,col_data in df.items():
    
    res_list = []
    quest_list = []
    response_list = []
    if col_name == df.columns[0]:
        continue
    n = len(df[df.columns[0]])
    res_list.extend(df[df.columns[0]])
    quest_list.extend([str(col_name)]*n)
    response_list.extend(col_data.to_list())
    df_to_append = pd.DataFrame({"responder":res_list, "question": quest_list , "response": response_list})
    dfs.append(df_to_append)

In [None]:
all_dfs = pd.concat(dfs)

%%

In [None]:
    print('################')
    print(k)
    print(v)
    print('\n')

%%

In [None]:
for m in multiple_files:
    os.chdir(m)

%%

%%

In [None]:
from nameparser import HumanName

In [None]:
def is_name(value):
    try:
        name = HumanName(value)
        # If name parsing did not fail, it might be a name
        return True
    except:
        return False
# %%

In [None]:
multiple_files = []
dfs = []
for i,f in enumerate(survey_update['file_path']):
    # path = survey_update['file_path'].iloc[1]
    # print(path)
    path = f
    break_main_loop = False
    print('###############')
    print(survey_update['event'].iloc[i])
    print(path)
    try:
        os.chdir(path)
    except:
        print('no')
        continue
    files = glob.glob('*.xlsx')
    if len(files) != 1:
        # print(files)
        # for file in files:
        #     file_df = pd.read_excel(file)
        #     print(file_df)
        print("%%%%%")
        print("multiple files")
        print("%%%%%")
        print('\n')
        print('\n')
        print('\n')
        print('\n')
        print('\n')
        multiple_files.append(path)
        continue
    else:
        file = files[0]

In [None]:
    data = pd.read_excel(file)
    data_cl = data.dropna(how = 'all', axis = 1)
    

    ##THIS DOESNT WORK###
    # # Find columns where the substring is found in the column names
    # for name, col_values in data.cl.items():
    #     check_list = col_values.to_list()
    #     name_found = False
    #     for check in check_list:
    #         if is_name(str(check)):
    #             name_column = name
    #             name_found = True
    #             break
    #     if name_found == True:
    #         break
    

In [None]:
    #Looking for Committee member column to get respondents names as first column
    matching_columns = [col for col in data_cl.columns if re.search(r'^[Cc]ommittee [Mm]ember', col)]
    try:
        first_col = matching_columns[0]
    except:
        print("col match method #2")
        # try:
        summary_found = False
        for col_name, col_data in data_cl.items():
            print('***************************************')
            print(col_name)
            print(col_data)
            print('***************************************')
            print(summary_found)
            if summary_found == True:
                first_col = col_name
                print("first column:")
                print(col_name)
                print("______________________________________________________________________________")
                print(col_data)
                print("______________________________________________________________________________")
                break
            col_to_list = col_data.to_list()

In [None]:
            for col_val in col_to_list:
                # print(col_val)
                # if summary_found == True:
                #     name_col = col_name
                #     print("[" + str(name_col) + "]"+ " is what is being matched")
                #     print(col_data)
                #     break
                    
                
                if re.search(r'number of attendees|response rate', str(col_val).lower()):
                    # print(str(col_val))
                    summary_found = True
                    print('col value that matches: ' + str(col_val).lower())
                    # print(col_data)
                    break
            print(data_cl.columns[(len(data_cl.columns)-1)])
            if col == data_cl.columns[(len(data_cl.columns)-1)] and summary_found ==  False:
                print("no summary found")
                break_main_loop = True
    
        for ik, k in enumerate(data_cl[first_col]):
            if 'attendees' in str(k) or 'response rate' in str(k).lower():
                print('THIS ISNT RIGHT') 
                break_main_loop = True
    if break_main_loop == True:
        break

            # print("first column: " + str(start))
            # print(data_cl.loc[:,name_col])
        # except:
        #     print('no matching columns')
        #     # print(data.columns)
        #     # print(data.iloc[:,0])
        #     # print(data.iloc[:,1])
        #     # print(data_cl)
        #     break
    
    if first_col:
        print('CONTINUE')
        print(first_col)
    else:
        print('STOP')
        # break
    data_cl = data_cl.loc[:,first_col:]
    
    #     data_cl = data_cl.loc[:,first_col:]
    #     print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
    #     print('new data frame')
    #     print(data_cl)
    #     print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
    # except:
    #     print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
    #     print("something wrong, df below for reference")
    #     print(data_cl)
    #     print("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\")
    #     break

In [None]:
    
    for ix,j in enumerate(data_cl[data_cl.columns[0]]):
        # print(j)
        if 'average' in str(j).lower():
            stop = ix-1
            break
    data_cl = data_cl.iloc[:stop,:]
    
    data_cl = data_cl[~data_cl.iloc[:,3].astype(str).str.contains("sessions", case = False, na = False)]

In [None]:
    # data_cl[data_cl.columns[1]] = data_cl[data_cl.columns[1]].astype(int)
    for col, data in data_cl.items():
        # print(col)
        # data  = data_cl["The Financial Reality of NC Community College Students"]
        data_int = data.dropna()
        # print(data)
        # data_int = data_int.astype(int)
        # pd.api.types.is_integer_dtype(data)
        try:
            # Try to convert to int, if successful update the DataFrame
            data_int = data_int.astype(int)
        except ValueError:
            # If conversion fails, convert to string
            data_int = data_int.astype(str)
        data_cl[col] = data_int

In [None]:
    #gathering data to for export file
    
    for col, data in data_cl.items():
        if col == data_cl.columns[0]:
            # print("this worked")
            # print(col)
            # print(data)
            continue
        # if str(col).strip() == "Committee Member":
        #     continue

        # print(data)
        if pd.api.types.is_integer_dtype(data_cl[col]):
            n = len(data)
            data_type = ['quantitative']*n
            event_name = [str(survey_update['event'].iloc[i])]*n
            respondents = data_cl.iloc[0,:].to_list()
            quest = [str(col)]*n
            response = data.to_list()
            
        else:
            n = len(data)
            data_type = ['qualitative']*n
            event_name = [str(survey_update['event'].iloc[i])]*n
            respondents = data_cl.iloc[:,0].to_list()
            quest = [str(col)]*n
            response = data.to_list()
        list_list = [data_type,event_name,respondents,quest,response]
        if all(len(v) == n for v in list_list):
            print('making df')
            df = pd.DataFrame({'event': event_name, 
            'data_type': data_type, 
            'responder_name': respondents,
            'question': quest,
            'response': response})
            dfs.append(df)
            
            # print('%%%%%%%')
            # print('issue with: ' + str(survey_update['event'].iloc[i]))
            # print(path)
            # print('%%%%%%%')
            # print("_______questions________")
            # print(quest)
            # print("_______respondents_______")
            # print(respondents)
            # break_main_loop = True
            # break
        else:
            print('lengths dont match')
            break
        
        
        # event_list.extend(event_name)
        # data_type_list.extend(data_type)
        # responder_list.extend(respondents)
        # question_list.extend(quest)
        # response_list.extend(response)

        # try:
        #     df = pd.DataFrame({'event': event_list, 
        #     'data_type': data_type_list, 
        #     'responder_name': response,
        #     'question': question_list,
        #     'response': response_list})
        # except:
        #     print("event_list" + ": " +str(len(event_list)))
        #     print("data_type_list" + ": " +str(len(data_type_list)))
        #     print("responder_list" + ": " +str(len(responder_list)))
        #     print("question_list" + ": " +str(len(question_list)))
        #     print("response_list" + ": " +str(len(response_list)))

        #     break
        
    if break_main_loop == True:
        break

In [None]:
all_survey_data = pd.concat(dfs)

In [None]:
            
   
        
# %%
    for i,j in enumerate(data_cl['Commitee Member']):
        list = [0,i]
    
    # data_cl.iloc[:,0].to_list()
    print(data_cl.dtypes)
    print(data_cl)
    print('\n')
# print("Columns with missing values:")
# print(data.isnull().sum())
# %%
for d in data.columns:
    print(d)
    
# %%%