## Visualization Demonstration

### Set up environment
Call in libraries we will use this semester

In [None]:
from datascience import *
import matplotlib.pyplot as plt

%matplotlib inline

#### Create table using survey dataset from GitHub

In [None]:
survey = Table.read_table('Survey_F24.csv')
survey.show(3)

In [None]:
survey.labels

## Exploring the data

In [None]:
#categorical variable
gender_dist = survey.group('Gender')
gender_dist

In [None]:
gender_dist.barh('Gender')

### Fix input errors/discrepancies

In [None]:
#Use are.equal_to to replace 'male ' with 'male' in the Gender column

male_count = gender_dist.where('Gender', are.equal_to('male')).column('count').sum() + \
             gender_dist.where('Gender', are.equal_to('male ')).column('count').sum()

female_count = gender_dist.where('Gender', are.equal_to('female')).column('count').sum()

gender_dist_clean = Table().with_columns(
    'Gender', ['male', 'female'],
    'count', [male_count, female_count]
)
gender_dist_clean

In [None]:
gender_dist_clean.barh('Gender')

In [None]:
# More advanced replace 
# Define a function to replace 'male ' with 'male'

def clean_gender(value):
    # Replace spaces inside text
    value = value.replace(" ","")
    
    # Strip space characters (tabs, returns, and spaces) from the beginning and end of a string
    value = value.strip()

    # Standardize Text to Lower Case
    value = value.lower()

    return value

# Apply the function to the entire 'Gender' column
gender_dist_cleaned = gender_dist.with_column('Gender', gender_dist.apply(clean_gender, 'Gender'))

# Group by Gender and sum the counts
gender_dist_cleaned = gender_dist_cleaned.group('Gender', sum)

# Sort the table by count in descending order
gender_dist_cleaned = gender_dist_cleaned.sort('count sum', descending=True)

# Create the horizontal bar plot
gender_dist_cleaned.barh('Gender')

## Visualizing data

In [None]:
#numerical data
survey.hist('Units taking')

In [None]:
eye_color = survey.group('Eye color')
eye_color

In [None]:
eye_color.barh('Eye color')

In [None]:
eye_color.sort('count', descending = True).barh('Eye color')

In [None]:
handed = survey.group(['Handed', 'Pant leg'])
handed.labels

In [None]:
l_handed= handed.where('Handed', are.equal_to('left')).drop('Handed')
l_handed.barh('Pant leg')
plt.title ('Pant Leg Preference for Left Handedness')

In [None]:
r_handed = handed.where('Handed', are.equal_to('right')).drop('Handed')
r_handed.barh('Pant leg')
plt.title ('Pant Leg Preference for Right Handedness')

In [None]:
# Group by 'Handed' and create a bar for each 'Pant leg' preference
handed.pivot('Pant leg', 'Handed', 'count', sum).barh('Handed')

plt.title('Pant Leg Preference by Handedness')
plt.xlabel('Count')
plt.ylabel('Handed')
plt.show()