# Introduction to Python (Part 2)¶

## Create, Manipulate and Summarise Data

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import os
# from google.colab import files #Uncomment if you are using Google Colab
import io
import random
from scipy import stats

---
### Generating Numbers and Sequences

In [None]:
# 20 random numbers between 1 and 10 (inclusive)
x1 = np.random.randint(1, 11, size=20)
x1

In [None]:
# 20 random numbers between 5 and 15 from a Uniform distribution
x2 = np.random.uniform(5, 15, 20)
x2

In [None]:
# 20 random numbers from a normal distribution with mean=12 and sd=3
x3 = np.random.normal(loc=12, scale=3, size=20)
x3

---

In [None]:
# A sequence from 1 to 20 with a 0.5 increment
x4 = np.arange(1, 20.5, 0.5)
x4

In [None]:
# Repeat the sequence {1,2,3,4,5} 5 times over
x5 = np.tile(np.arange(1,6), 5)
x5

In [None]:
# Repeat each element of the sequence {1,2,3,4,5} by 5 times
x6 = np.repeat(np.arange(1,6), 5)
x6

In [None]:
# Repeat each element of the sequence {1,2,3,4,5} by 3 times, then repeat that sequence 2 times
x7 = np.tile(np.repeat(np.arange(1,6), 3), 2)
x7

---

In [None]:
# List first 10 letters (lowercase)
import string
list(string.ascii_lowercase[:10])

In [None]:
# List first 10 letters (UPPERCASE)
list(string.ascii_uppercase[:10])

---
## Data Import/Export and Visualization

In [None]:
# Uncomment the following if you are using Google Colab
# uploaded = files.upload()  # Interactive upload
# display(Image(next(iter(uploaded))))

In [None]:
# Setting the default display style
pd.reset_option('^display.', silent=True)  # The '^' targets all display options

In [None]:
# Upload the ElderlyPopWA-1.csv file
uploaded = files.upload()
ElderlyPopWA = pd.read_csv(io.BytesIO(uploaded['ElderlyPopWA-1.csv']))

In [None]:
# Uncomment the following lines if you want to see all the rows and columns
# pd.set_option('display.max_rows', None)  # Show all rows
# pd.set_option('display.max_columns', None)  # Show all columns
display(ElderlyPopWA) # Using display function to view the data

In [None]:
ElderlyPopWA.head() # Show only the head

In [None]:
# Export DataFrame to CSV
ElderlyPopWA.to_csv('ElderlyPopWA_updated.csv', index=False) # index = False ensures that no row numbers are entered

---
## Data Manipulation and Summaries

In [None]:
# Create BMI categories for the elderly female participants
# (Assumes ElderlyPopWA DataFrame is loaded)
mBMI = ElderlyPopWA['BMI'].max()
ElderlyPopWA['BMI_class'] = pd.cut(ElderlyPopWA['BMI'],
     bins=[0,23,31,mBMI],
     labels=['Underweight','Healthy_Weight','Overweight'])
ElderlyPopWA

In [None]:
# Subsetting by a categorical variable
Underweight = ElderlyPopWA[ElderlyPopWA['BMI_class']=='Underweight']
Underweight

In [None]:
# Subsetting by a categorical variable
# Selecting more than one value
Unhealthy_weight = ElderlyPopWA[ElderlyPopWA['BMI_class'].isin(['Underweight','Overweight'])]
Unhealthy_weight

In [None]:
# Select a subset
# Select those under 75 years of age, and select all columns
Age_LT75 = ElderlyPopWA[ElderlyPopWA['Age'] < 75] 
Age_LT75

In [None]:
# Select a subset
# Select those under 75 years of age, but select fewer columns only
# use : instead of ['BMI', 'Waist'] incase if all the columns are to be selected
Age_LT75 = ElderlyPopWA.loc[ElderlyPopWA['Age']<75, ['BMI', 'Waist']]; Age_LT75

In [None]:
# Select a subset by using "~" operator
# Select those that are not under 75 years of age.
Age_GTE75 = ElderlyPopWA[~(ElderlyPopWA['Age'] < 75)] 
Age_GTE75

---
## Data Statistics and Analysis

### Measure the centre

In [None]:
ElderlyPopWA['Age'].mean()

In [None]:
ElderlyPopWA['Age'].median()

### Measure the Shape

In [None]:
ElderlyPopWA['Age'].std()

In [None]:
ElderlyPopWA['Age'].min(), ElderlyPopWA['Age'].max()

In [None]:
ElderlyPopWA['Age'].quantile([0.25,0.5,0.75])

In [None]:
np.percentile(ElderlyPopWA['Age'], [0,25,50,75,100])

### Measure the Spread

In [None]:
# Skewness and kurtosis (requires scipy.stats)
stats.skew(ElderlyPopWA['Age'])

In [None]:
# Skewness and kurtosis (requires scipy.stats)
stats.kurtosis(ElderlyPopWA['Age'])

#### Column-wise opeations

In [None]:
# Column means
ElderlyPopWA.iloc[:,1:8].mean()

In [None]:
# Apply mean column-wise using axis=0, axis=1 implies row-wise
ElderlyPopWA.iloc[:,1:8].apply(np.mean, axis=0)

In [None]:
# # Apply std column-wise using axis=0, axis=1 implies row-wise
ElderlyPopWA.iloc[:,1:8].apply(np.std, axis=0)

#### Frequency and proportion tables

In [None]:
# Number of participants in each BMI Class
BMI_freq = ElderlyPopWA['BMI_class'].value_counts(); BMI_freq

In [None]:
# Proportions of the samples for each BMI Class
BMI_prop = ElderlyPopWA['BMI_class'].value_counts(normalize=True); BMI_prop

In [None]:
# Create another categorical variable, i.e. age group
ElderlyPopWA['Age_grp'] = pd.cut(ElderlyPopWA['Age'], [0,74.99,100], labels=['<75years','75+years'])
tab = pd.crosstab(ElderlyPopWA['Age_grp'], ElderlyPopWA['BMI_class']); tab

In [None]:
# Checking the propotion of new variable, age group
tab.div(tab.sum(axis=1), axis=0)  # Proportions by row

In [None]:
tab.div(tab.sum(axis=0), axis=1)  # Proportions by column

In [None]:
tab/tab.values.sum()  # Proportions relative to overall sample size

In [None]:
# Summarise waist circumference by BMI class
ElderlyPopWA.groupby('BMI_class', observed=True)['Waist'].mean() # Compute the mean

In [None]:
ElderlyPopWA.groupby('BMI_class', observed=True)['Waist'].std() # Compute the standard deviation

In [None]:
# Summarise waist circumference by BMI class and age group
ElderlyPopWA.groupby(['BMI_class','Age_grp'], observed=True)['Waist'].mean() # Compute the mean

In [None]:
ElderlyPopWA.groupby(['BMI_class','Age_grp'], observed=True)['Waist'].std() # Compute the standard deviation

In [None]:
# Mean waist circumference by BMI Class, using apply
ElderlyPopWA.groupby('BMI_class', observed=True)['Waist'].apply(np.mean)

In [None]:
ElderlyPopWA.groupby(['BMI_class','Age_grp'], observed=True)['Waist'].apply(np.mean) #BMI CLass and Age Group

In [None]:
# Round proportions
np.round(ElderlyPopWA['BMI_class'].value_counts(normalize=True), 3)

In [None]:
# Chained operations with pandas
ElderlyPopWA['BMI_class'].value_counts(normalize=True).round(3) # Round to 3 dp

---
### Functions in Python

In [None]:
# Function definition and usage in Python
def Greeting():
    print("Hello! My name is XXXX")

In [None]:
Greeting() # Calling a function

In [None]:
def add3(x):
    return x + 3

In [None]:
add3(5) # Passing parameters

In [None]:
add3(10)

In [None]:
add3(15)

In [None]:
def add_mult(x, y):
    sum_xy = x + y
    prod_xy = x * y
    ratio_xy = x / y
    return sum_xy, prod_xy, ratio_xy

In [None]:
x = 3
y = 4
print(add_mult(x, y))

In [None]:
add_mult(x=3, y=4)

In [None]:
add_mult(y=4, x=3)

In [None]:
add_mult(3, 4)

---
### Control Stuctures *(IF ELSE Statements)*

In [None]:
# If statement: check if x is odd
x = 5
if x % 2 != 0:
    print(f'{x} is an odd integer')

In [None]:
# If-else statement: check if x is odd or even
x = 10
if x % 2 != 0:
    print(f'{x} is an odd integer')
else:
    print(f'{x} is an even integer')

In [None]:
# Nested if-elif-else for grading
score = 65
if score < 0:
    print('Invalid score!')
elif score < 50:
    print('Your final grade is N.')
elif score < 60:
    print('Your final grade is P.')
elif score < 70:
    print('Your final grade is C.')
elif score < 80:
    print('Your final grade is D.')
elif score <= 100:
    print('Your final grade is HD.')
else:
    print('Invalid score!')

In [None]:
# For loop from 1 to 10
for I in range(1, 11):
    print(I)

In [None]:
# For loop to print mean of columns 2 to 8 (assumes ElderlyPopWA is loaded)
dat = ElderlyPopWA.copy()
for J in range(1, 8):
    print(dat.iloc[:, J].mean().round(3))

In [None]:
# For loop: print numbers divisible by 7 from 1 to 100
for I in range(1, 101):
    if I % 7 == 0:
        print(I)

In [None]:
# For loop: print first 10 numbers divisible by 13 between 500 and 800
count = 0
for I in range(500, 801):
    if I % 13 == 0:
        print(I)
        count += 1
    if count == 10:
        break