In [None]:
# Don
# Dr. Ofori
# DSC-510 Python Proficiency with statistics
#September 2023

In [1]:
import pandas as pd
import random

In [20]:
# Generate synthetic data
data = {
    'Participant ID': [random.randint(100, 999) for _ in range(100)],
    'Exercise group': [random.choice(["jogging", "weightlifting", "yoga"]) for _ in range(100)],
    'Pre-exercise systolic blood pressure': [random.randint(95, 145) for _ in range(100)],
    'Post-exercise systolic blood pressure': [random.randint(90, 150) for _ in range(100)],
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the DataFrame as a CSV file
df.to_csv('exercise_data.csv', index=False)

In [21]:
#Computational Aspect: Synthetic data is generated using Python's random module to emulate the characteristics
#of the study. The data includes participant IDs, exercise groups, and pre- and post-exercise blood pressure measurements.

#Statistical Aspect: There is no direct statistical analysis involved in this data generation.
#This task establishes the groundwork for future statistical analysis, as it provides data for the subsequent analysis, 
#ensuring that the dataset matches the study's description.

In [22]:
def count_vowels(exercise_group):
    vowels = "aeiouAEIOU"
    return sum(1 for char in exercise_group if char in vowels)

# Example:
exercise_group = "weightlifting"
vowel_count = count_vowels(exercise_group)
print(f"Number of vowels in {exercise_group}: {vowel_count}")

Number of vowels in weightlifting: 4


In [23]:
#Computational Aspect: The function count_vowels takes an exercise group name 
#as the input and uses a Python loop to count the vowels in the given string.

#Statistical Aspect: This task can be useful for text processing or summarizing group names.

In [24]:
df = pd.read_csv('exercise_data.csv')

for group in df['Exercise group'].unique():
    max_pre_bp = df[df['Exercise group'] == group]['Pre-exercise systolic blood pressure'].max()
    participant = df[(df['Exercise group'] == group) & (df['Pre-exercise systolic blood pressure'] == max_pre_bp)]
    print(f"Highest pre-exercise BP in {group}:")
    print(participant)

Highest pre-exercise BP in yoga:
    Participant ID Exercise group  Pre-exercise systolic blood pressure  \
17             277           yoga                                   144   
98             617           yoga                                   144   

    Post-exercise systolic blood pressure  
17                                    139  
98                                    118  
Highest pre-exercise BP in weightlifting:
    Participant ID Exercise group  Pre-exercise systolic blood pressure  \
65             478  weightlifting                                   142   

    Post-exercise systolic blood pressure  
65                                     93  
Highest pre-exercise BP in jogging:
    Participant ID Exercise group  Pre-exercise systolic blood pressure  \
18             141        jogging                                   145   

    Post-exercise systolic blood pressure  
18                                    118  


In [25]:
# Computational Aspect: This task involves reading the CSV data into a DataFrame using pandas. 
# Then, it uses pandas' functionality to filter and find the participant with the highest
# pre-exercise systolic blood pressure in each exercise group.

# Statistical Aspect: This task identifies the participant with the highest pre-exercise blood pressure in each group, 
# which can be valuable for understanding the range and distribution of pre-exercise blood pressures within each group.

In [26]:
def extract_even_ids(participant_ids):
    return [id for id in participant_ids if id % 2 == 0]

# Example usage:
participant_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
even_ids = extract_even_ids(participant_ids)
print("Even Participant IDs:", even_ids)

Even Participant IDs: [2, 4, 6, 8, 10]


In [27]:
# Read the CSV data into a DataFrame
df = pd.read_csv('exercise_data.csv')

# Extract the "Participant ID" column
participant_ids = df['Participant ID']

# Define the extract_even_ids function
def extract_even_ids(participant_ids):
    return [id for id in participant_ids if id % 2 == 0]

# Apply the extract_even_ids function
even_ids = extract_even_ids(participant_ids)

# Print the even participant IDs
print("Even Participant IDs:", even_ids)

Even Participant IDs: [264, 720, 308, 496, 762, 132, 592, 260, 448, 968, 258, 814, 508, 728, 600, 976, 208, 258, 808, 152, 980, 780, 310, 110, 900, 916, 526, 208, 400, 320, 152, 418, 434, 478, 334, 434, 492, 500, 204, 332, 684, 938, 900]


In [28]:
# Computational Aspect: The extract_even_ids function iterates through a list of participant IDs, 
# checking each ID to see if it is even. If an ID is even (divisible by 2), it's included in the output list.

# Statistical Aspect: Although this task involves primarily data processing. It enables researchers to create 
# specific subsets for potential statistical analysis, assisting in the exploration of patterns or differences
# among even-numbered participants.

In [29]:
# Read the CSV data into a DataFrame
#df = pd.read_csv('exercise_data.csv')

for group in df['Exercise group'].unique():
    group_df = df[df['Exercise group'] == group]
    
    # Calculate the average monthly blood pressure change and round to two decimal places
    avg_change = round((group_df['Post-exercise systolic blood pressure'] - 
                        group_df['Pre-exercise systolic blood pressure']).mean(), 3)
    
    print(f"Average monthly blood pressure change for {group}: {avg_change}")

Average monthly blood pressure change for yoga: -3.897
Average monthly blood pressure change for weightlifting: -1.757
Average monthly blood pressure change for jogging: -1.706


In [30]:
# Computational Aspect: The task reads the CSV data and uses pandas to group data by exercise group. 
# Then, it calculates the average monthly change in blood pressure for each group by subtracting 
# pre-exercise from post-exercise measurements.

# Statistical Aspect: This task provides insight into the average changes in blood pressure over time 
# for each exercise group, helping assess the effectiveness of the exercise regimens in reducing blood pressure.

In [31]:
def compare_blood_pressure(pre_bp, post_bp):
    return [post - pre for pre, post in zip(pre_bp, post_bp)]

# Example usage:
pre_bp = [120, 130, 140]
post_bp = [110, 125, 135]
differences = compare_blood_pressure(pre_bp, post_bp)
print("Blood Pressure Differences:", differences)

Blood Pressure Differences: [-10, -5, -5]


In [32]:
# Computational Aspect: Using numpy, the function compare_blood_pressure calculates 
# the differences between pre- and post-exercise blood pressure.

# Statistical Aspect: This task computes individual blood pressure changes from the lists and 
# analyzes the effectiveness of the exercise regimens. These differences can be used for further 
# statistical analysis, such as calculating means and standard deviations.

In [33]:
df = pd.read_csv('exercise_data.csv')

# Group by 'Exercise group' and calculate the sum of 'Pre-exercise systolic blood pressure' and 'Post-exercise systolic blood pressure'
group_reduction = df.groupby('Exercise group')[['Pre-exercise systolic blood pressure', 'Post-exercise systolic blood pressure']].sum()

# Calculate the total blood pressure reduction for each exercise group
group_reduction['Total Reduction'] = group_reduction['Pre-exercise systolic blood pressure'] - group_reduction['Post-exercise systolic blood pressure']

print("Total Blood Pressure Reduction by Exercise Group:")
print(group_reduction)

Total Blood Pressure Reduction by Exercise Group:
                Pre-exercise systolic blood pressure  \
Exercise group                                         
jogging                                         4135   
weightlifting                                   4341   
yoga                                            3587   

                Post-exercise systolic blood pressure  Total Reduction  
Exercise group                                                          
jogging                                          4077               58  
weightlifting                                    4276               65  
yoga                                             3474              113  


In [34]:
# Computational Aspect: This task uses pandas to group data by exercise group 
# then, calculates the total blood pressure reduction by subtracting the sum of post-exercise blood pressures 
# from the sum of pre-exercise blood pressures.

# Statistical Aspect: The task provides a summary of the total blood pressure reduction for each exercise 
# group, allowing the researcher to compare the effectiveness of different exercise regimens in reducing blood pressure.