# Import Required Libraries
Import the necessary libraries, including pandas and matplotlib.

In [1]:
# Import Required Libraries
import pandas as pd
import matplotlib.pyplot as plt

# Enable inline plotting for matplotlib
%matplotlib inline

# Load Survey Data
Load the survey response data from the files in the survey_results folder.

In [4]:
# Load Survey Data
import os

# Define the folder path
folder_path = 'survey_results'

# List all files in the folder
files = os.listdir(folder_path)

# Load all CSV files into a dictionary of DataFrames
data_frames = {}
for file in files:
    if file.endswith('.csv'):
        file_path = os.path.join(folder_path, file)
        data_frames[file] = pd.read_csv(file_path)

# Display the keys of the dictionary to show loaded files
data_frames

{}

# Inspect Data
Inspect the first few rows of the data to understand its structure and contents.

In [None]:
# Inspect Data

# Display the first few rows of each DataFrame to understand its structure and contents
for file_name, df in data_frames.items():
    print(f"First few rows of {file_name}:")
    display(df.head())

# Clean Data
Clean the data by handling missing values and correcting data types if necessary.

In [None]:
# Clean Data

# Handle missing values and correct data types if necessary
for file_name, df in data_frames.items():
    # Drop rows with any missing values
    df.dropna(inplace=True)
    
    # Convert columns to appropriate data types if necessary
    # Example: Convert 'age' column to integer
    if 'age' in df.columns:
        df['age'] = df['age'].astype(int)
    
    # Update the DataFrame in the dictionary
    data_frames[file_name] = df

# Display the cleaned data
for file_name, df in data_frames.items():
    print(f"Cleaned data for {file_name}:")
    display(df.head())

# Analyze Survey Responses
Perform analysis on the survey responses, such as calculating summary statistics and identifying trends.

In [None]:
# Analyze Survey Responses

# Calculate summary statistics for each DataFrame
summary_statistics = {}
for file_name, df in data_frames.items():
    summary_statistics[file_name] = df.describe()

# Display summary statistics
for file_name, stats in summary_statistics.items():
    print(f"Summary statistics for {file_name}:")
    display(stats)

# Identify trends in the data
# Example: Plot the distribution of ages if 'age' column exists
for file_name, df in data_frames.items():
    if 'age' in df.columns:
        plt.figure(figsize=(10, 6))
        plt.hist(df['age'], bins=20, edgecolor='k', alpha=0.7)
        plt.title(f'Age Distribution in {file_name}')
        plt.xlabel('Age')
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.show()

# Example: Plot the count of responses for each unique value in a specific column
# Assuming there is a 'response' column in the survey data
for file_name, df in data_frames.items():
    if 'response' in df.columns:
        plt.figure(figsize=(10, 6))
        df['response'].value_counts().plot(kind='bar', edgecolor='k', alpha=0.7)
        plt.title(f'Response Counts in {file_name}')
        plt.xlabel('Response')
        plt.ylabel('Count')
        plt.grid(True)
        plt.show()

# Visualize Results
Create visualizations to represent the survey results using matplotlib or seaborn.

In [None]:
# Visualize Results

import seaborn as sns

# Example: Create a box plot for age distribution if 'age' column exists
for file_name, df in data_frames.items():
    if 'age' in df.columns:
        plt.figure(figsize=(10, 6))
        sns.boxplot(x=df['age'])
        plt.title(f'Age Distribution Box Plot in {file_name}')
        plt.xlabel('Age')
        plt.grid(True)
        plt.show()

# Example: Create a bar plot for the count of responses for each unique value in a specific column
# Assuming there is a 'response' column in the survey data
for file_name, df in data_frames.items():
    if 'response' in df.columns:
        plt.figure(figsize=(10, 6))
        sns.countplot(x=df['response'], order=df['response'].value_counts().index)
        plt.title(f'Response Counts in {file_name}')
        plt.xlabel('Response')
        plt.ylabel('Count')
        plt.grid(True)
        plt.show()

# Example: Create a heatmap for correlation matrix if there are numerical columns
for file_name, df in data_frames.items():
    numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
    if len(numerical_cols) > 1:
        plt.figure(figsize=(12, 8))
        correlation_matrix = df[numerical_cols].corr()
        sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
        plt.title(f'Correlation Matrix Heatmap in {file_name}')
        plt.show()