In [7]:
#!/usr/bin/env python3

import pandas as pd
import pyreadstat
# import openpyxl
from openpyxl import load_workbook

# Read SPSS .sav file
data, meta = pyreadstat.read_sav(
    "../../SPSS-Python/spss-datasets/Mah_CSAT_practice.sav"
)

df_copy = pyreadstat.set_value_labels(data, meta)

# List of variable names
# ['Q4B_1', 'Q4B_2', 'Q4B_3', 'Q4B_4'] 
values_list = ['Q4B_1', 'Q4B_2', 'Q4B_3', 'Q4B_4']

# excel file to save result to
excel_file = '../Mah_CSAT/grouped_frequencies.xlsx'

# Select variables from the SPSS dataset
selected_variables = [
    variable for variable in meta.column_names if variable in values_list
]

# Create a new dataframe by concatenating rows
combined_df = pd.concat(
    [df_copy[selected_var].rename(selected_var) for selected_var in selected_variables],
    axis=0,
    ignore_index=True,
).dropna(axis=0)

# count responses by variable value and get total counts for combined.df
value_counts = combined_df.value_counts(normalize=False).sort_index()
print(f"{value_counts}")

value_counts = combined_df.value_counts(normalize=True).sort_index()
total_counts = combined_df.value_counts().sum()

print(f"{value_counts}\nTotal counts: {total_counts}")

# Save to Excel
result = pd.concat([value_counts, pd.Series(total_counts, index=["Total counts"])], axis=1)
result.columns = ["Value counts", "Total counts"]

# Open the existing workbook
book = load_workbook(excel_file)

# Create a new worksheet with the name of the first value in values_list
worksheet_name = values_list[0]

# Append the DataFrame to the existing workbook
with pd.ExcelWriter(excel_file, if_sheet_exists='overlay', engine="openpyxl", mode="a") as writer:
    result.to_excel(writer, sheet_name=worksheet_name, index_label="Variable")


KeyboardInterrupt: 

#### Break this script into functions

In [19]:
#!/usr/bin/env python3

import pandas as pd
import pyreadstat
from openpyxl import load_workbook

def concat_variables(selected_variables, concat_direction=0):
    # Create a new dataframe by concatenating rows
    # axis=0 concats to same column, axis=1 concats to end of the row
    combined_df = pd.concat(
        [df_copy[selected_var].rename(selected_var) for selected_var in selected_variables],
        axis=concat_direction,
        ignore_index=True,
    ).dropna(axis=0)
    return combined_df


# Read SPSS .sav file
data, meta = pyreadstat.read_sav(
    "../../SPSS-Python/spss-datasets/Mah_CSAT_practice.sav"
)

df_copy = pyreadstat.set_value_labels(data, meta)

# List of variable names
values_list = ['Q4B_1', 'Q4B_2', 'Q4B_3', 'Q4B_4']

# excel file to save result to
excel_file = '../Mah_CSAT/grouped_frequencies.xlsx'

# Select variables from the SPSS dataset
selected_variables = [
    variable for variable in meta.column_names if variable in values_list
]
print(f'selected_variables:\n {selected_variables}\n')

concat_variables(selected_variables, 1)

# count responses by variable value and get total counts for combined.df
value_counts = combined_df.value_counts(normalize=False).sort_index()
print(f"{value_counts}")

value_counts = combined_df.value_counts(normalize=True).sort_index()
total_counts = combined_df.value_counts().sum()

print(f"{value_counts}\nTotal counts: {total_counts}")

# Save to Excel
result = pd.concat([value_counts, pd.Series(total_counts, index=["Total counts"])], axis=1)
result.columns = ["Value counts", "Total counts"]

# Open the existing workbook
book = load_workbook(excel_file)

# Create a new worksheet with the name of the first value in values_list
worksheet_name = values_list[0]

# Append the DataFrame to the existing workbook
with pd.ExcelWriter(excel_file, if_sheet_exists='overlay', engine="openpyxl", mode="a") as writer:
    result.to_excel(writer, sheet_name=worksheet_name, index_label="Variable")


selected_variables:
 ['Q4B_1', 'Q4B_2', 'Q4B_3', 'Q4B_4']

All other                                                                      28
Convenience or comfort of use issues                                            1
Hard to start, wouldn't start, shut off                                         7
Issue with gears                                                                1
Issue with lights, gauges, panels, latches                                      4
Issues with clutch, transmission                                                6
Issues with fuel system, fuel tanks                                             1
Issues with hydraulics                                                         22
Issues with seat                                                                3
Issues with tires                                                               5
Issues with wiring, electrical                                                  2
Leaking                                