# Introduction to Python (Part 1)
## Fundamentals of Programing in Python
---

NumPY is a foundational Python library for numerical computing.
Provides: Fast multi-dimensional arrays (ndarray),
Mathematical operations (linear algebra, statistics, etc.),
Efficient data storage and vectorized operations.

Pandas is built on NumPy for data manipulation and analysis.
Provides: DataFrames (tabular data) and Series (1D arrays),
Tools for cleaning, transforming, and analyzing structured data,
Time series handling, I/O (CSV/Excel), and merging datasets.

In [63]:
# Import necessary libraries

import numpy as np
import pandas as pd

---
### Variable Assignment

In [65]:
# Assign a value to x
x = 35

In [67]:
# Display x
x

35

In [69]:
# Assign a new value to x and display
x = 18
x

18

In [71]:
# y is x + 8
y = x + 8
y

26

In [73]:
# y is 3*x + 5
y = 3 * x + 5
y

59

---

In [76]:
# Area of a circle given diameter
diam = 10  # diameter = 10 cm
radius = diam / 2  # radius is half of the diameter
area_circle = np.pi * radius ** 2  # area of the circle
area_circle

78.53981633974483

---

### Data Types

#### Strings

In [81]:
# String examples
'a'

'a'

In [83]:
'abc123'

'abc123'

In [85]:
'apples'

'apples'

In [87]:
'I hate apples'

'I hate apples'

In [93]:
# Multiple strings in one cell, Creates a tuple
'a', 'abc' , 'apples', 'I hate apples'

('a', 'abc', 'apples', 'I hate apples')

#### Numeric Variables

#### Integer (whole numbers): int

In [106]:
5 # Positive whole number as constact

5

In [110]:
-3 # Negative whole number as constant

-3

In [114]:
x = 5 # Assignment of whole number makes x as int by default
x

5

In [116]:
type(x) # Confirming the data type

int

---
#### Floating-point numbers: float

In [151]:
5.5 # Positive floating-point constant

5.5

In [153]:
-2.75 # Negative floating-point constant

-2.75

In [155]:
np.pi

3.141592653589793

---

In [158]:
# Basic arithmetic operations
5.5 + 2.7  # Addition

8.2

In [160]:
5.5 - 2.7  # Subtraction

2.8

In [162]:
y = 5.5 / 2; y  # Division

2.75

In [164]:
type(y) # Note that float and int were involved in the operation

float

In [166]:
5.5 * 2  # Multiplication

11.0

In [168]:
5.5 ** 2  # Squaring

30.25

In [170]:
5.5 ** 4  # To the power of 4

915.0625

In [172]:
np.sqrt(5.5)  # Square root

2.345207879911715

In [174]:
np.exp(5.5)  # Exponential

244.69193226422038

In [176]:
np.log(5.5)  # Natural log

1.7047480922384253

---
#### Logical Operattors (Boolean)

In [179]:
# Logical comparisons
5 == 5  # Does 5 equal 5?

True

In [181]:
5 == 2  # Does 5 equal 2?

False

In [183]:
5 != 2  # Does 5 not equal 2?

True

In [185]:
5 > 2  # Is 5 greater than 2?

True

In [187]:
5 >= 2  # Is 5 greater than or equals to 2

True

In [189]:
5 < 2  # Is 5 less than 2?

False

In [191]:
5 <= 2  # Is 5 less than or equals to 2

False

In [193]:
# Logical operations
(5 > 2) and (5 > 4)

True

In [195]:
(5 > 2) and (5 < 4)

False

In [197]:
(5 > 2) or (5 < 4)

True

---
### Data Structures

#### Creating sequences

In [202]:
[]  # A null (empty) list

[]

In [204]:
a = [1, 2, 3]  # A numeric list
a

[1, 2, 3]

In [206]:
['a', 'b', 'c']  # A character list

['a', 'b', 'c']

In [208]:
[True, False, False]  # A logical list

[True, False, False]

In [210]:
# Create a list from 1 to 5
a = list(range(1, 6))
a

[1, 2, 3, 4, 5]

In [212]:
# Accessing elements in a list (Python is 0-indexed)
a[1]  # 2nd element

2

In [214]:
a[2]  # 3rd element

3

In [216]:
a[2:5]  # 3rd to 5th element

[3, 4, 5]

In [218]:
[a[1], a[4]]  # 2nd and 5th elements

[2, 5]

In [222]:
z = [1 , 'a', True]; z # list of different data types

[1, 'a', True]

In [224]:
type(z)

list

In [232]:
# Create a new list b containing the integers 6, 7, 8, 9, 10
b = list(range(6, 11))
b

[6, 7, 8, 9, 10]

In [234]:
# Combine lists a and b
c = a + b
c

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [236]:
# Element-wise operations using numpy arrays
a_np = np.array(a)
b_np = np.array(b)
a_np * 0.25  # Multiply elements of a by 0.25

array([0.25, 0.5 , 0.75, 1.  , 1.25])

In [238]:
a_np + b_np  # Add elements of a and b

array([ 7,  9, 11, 13, 15])

In [240]:
b_np - a_np  # Subtract elements of a from b

array([5, 5, 5, 5, 5])

In [242]:
a_np * b_np  # Multiply elements of a and b

array([ 6, 14, 24, 36, 50])

In [244]:
b_np / a_np  # Divide elements of b by elements of a

array([6.        , 3.5       , 2.66666667, 2.25      , 2.        ])

---
#### Categorical Data in Python using Pandas

In [246]:
f1 = pd.Categorical([1, 2, 3, 4, 5])
f1

[1, 2, 3, 4, 5]
Categories (5, int64): [1, 2, 3, 4, 5]

In [248]:
f2 = pd.Categorical(['Male', 'Female', 'Female', 'Male', 'Female'])
f2

['Male', 'Female', 'Female', 'Male', 'Female']
Categories (2, object): ['Female', 'Male']

In [250]:
f3 = pd.Categorical(['L', 'M', 'H', 'H', 'M', 'L'])
f3

['L', 'M', 'H', 'H', 'M', 'L']
Categories (3, object): ['H', 'L', 'M']

In [252]:
# Reorder the levels of f3
f3 = pd.Categorical(f3, categories=['L', 'M', 'H'], ordered=True)
f3

['L', 'M', 'H', 'H', 'M', 'L']
Categories (3, object): ['L' < 'M' < 'H']

---
#### Matrix Manipulation

In [256]:
# Create a 3x3 matrix, filled column-wise (default in numpy)
Mat_A = np.arange(1, 10).reshape((3, 3), order='F')
Mat_A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [258]:
# Create a 3x3 matrix, filled row-wise
Mat_B = np.arange(1, 10).reshape((3, 3), order='C')
Mat_B

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [260]:
# Create a 3x3 matrix, column-wise (explicit)
Mat_A = np.arange(1, 10).reshape((3, 3), order='F')
Mat_A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [262]:
# Bind vectors as columns and rows
v1 = np.array([1, 2, 3])  # Vector 1
v2 = np.array([4, 5, 6])  # Vector 2
v3 = np.array([7, 8, 9])  # Vector 3
Mat_A = np.column_stack((v1, v2, v3))  # as columns
Mat_A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [264]:
Mat_B = np.row_stack((v1, v2, v3))  # as rows
Mat_B

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [266]:
# Element-wise multiplication
Mat_A * Mat_B

array([[ 1,  8, 21],
       [ 8, 25, 48],
       [21, 48, 81]])

In [272]:
# Element-wise multiplication (Associative Law)
Mat_B * Mat_A

array([[ 1,  8, 21],
       [ 8, 25, 48],
       [21, 48, 81]])

In [268]:
# Matrix multiplication
Mat_A @ Mat_B  # A matrix multiply B

array([[ 66,  78,  90],
       [ 78,  93, 108],
       [ 90, 108, 126]])

In [274]:
Mat_B @ Mat_A  # B matrix multiply A

array([[ 14,  32,  50],
       [ 32,  77, 122],
       [ 50, 122, 194]])

In [282]:
# Accessing elements in a matrix (Python is 0-indexed)
Mat_A[1, 2]  # element in row 2, column 3

8

In [284]:
Mat_A[0:2, 2]  # rows 1 and 2, column 3

array([7, 8])

In [286]:
Mat_A[0, 1:3]  # row 1, columns 2 and 3

array([4, 7])

In [288]:
Mat_A[0, :]  # all elements in row 1

array([1, 4, 7])

In [290]:
Mat_A[:, 2]  # all elements in column 3

array([7, 8, 9])

In [292]:
Mat_A[:, [0, 2]]  # all elements in columns 1 and 3

array([[1, 7],
       [2, 8],
       [3, 9]])

In [294]:
Mat_A[:, 1:]  # all data except column 1

array([[4, 7],
       [5, 8],
       [6, 9]])

---
#### DataFrame

In [302]:
# Create vectors for a DataFrame
Name = ['John', 'Sarah', 'Zach', 'Beth', 'Lachlan']  # Name - Character vector
Age = [35, 28, 33, 55, 43]  # Age - Numeric vector
Gender = pd.Categorical(['Male', 'Female', 'Male', 'Female', 'Male'])  # Gender - factor

In [304]:
# Create a DataFrame
df = pd.DataFrame({'Name': Name, 'Age': Age, 'Gender': Gender})
df

Unnamed: 0,Name,Age,Gender
0,John,35,Male
1,Sarah,28,Female
2,Zach,33,Male
3,Beth,55,Female
4,Lachlan,43,Male


In [306]:
# Add new column to the DataFrame (Method 1)
Coffee_Drinker = [True, True, False, True, False]  # Drinks coffee? - logical vector
df1 = df.copy()
df1['Coffee_Drinker'] = Coffee_Drinker
df1

Unnamed: 0,Name,Age,Gender,Coffee_Drinker
0,John,35,Male,True
1,Sarah,28,Female,True
2,Zach,33,Male,False
3,Beth,55,Female,True
4,Lachlan,43,Male,False


In [308]:
# Add new column to the DataFrame (Method 2)
df2 = pd.concat([df, pd.Series(Coffee_Drinker, name='Coffee_Drinker')], axis=1)
df2

Unnamed: 0,Name,Age,Gender,Coffee_Drinker
0,John,35,Male,True
1,Sarah,28,Female,True
2,Zach,33,Male,False
3,Beth,55,Female,True
4,Lachlan,43,Male,False


In [310]:
# Accessing rows and columns in DataFrame
df.iloc[[0], 0:3]  # 1st row, columns 1-3

Unnamed: 0,Name,Age,Gender
0,John,35,Male


In [312]:
df.iloc[1:3, :]  # rows 2 and 3, all columns

Unnamed: 0,Name,Age,Gender
1,Sarah,28,Female
2,Zach,33,Male


In [314]:
df.iloc[:, [0, 2]]  # all rows, columns 1 and 3

Unnamed: 0,Name,Gender
0,John,Male
1,Sarah,Female
2,Zach,Male
3,Beth,Female
4,Lachlan,Male


In [316]:
# Accessing columns by name
df['Name']

0       John
1      Sarah
2       Zach
3       Beth
4    Lachlan
Name: Name, dtype: object

In [318]:
df[['Name', 'Gender']]

Unnamed: 0,Name,Gender
0,John,Male
1,Sarah,Female
2,Zach,Male
3,Beth,Female
4,Lachlan,Male


In [320]:
# Access and display columns (Another way)
df.Name

0       John
1      Sarah
2       Zach
3       Beth
4    Lachlan
Name: Name, dtype: object

In [322]:
df.loc[:, 'Age']

0    35
1    28
2    33
3    55
4    43
Name: Age, dtype: int64

In [324]:
df.iloc[:, 2]  # All rows, Third column (position 2)

0      Male
1    Female
2      Male
3    Female
4      Male
Name: Gender, dtype: category
Categories (2, object): ['Female', 'Male']

In [330]:
# Add new variables to df and display
df['Coffee_Drinker'] = Coffee_Drinker
df

Unnamed: 0,Name,Age,Gender,Coffee_Drinker
0,John,35,Male,True
1,Sarah,28,Female,True
2,Zach,33,Male,False
3,Beth,55,Female,True
4,Lachlan,43,Male,False


In [342]:
df['Diabetes'] = pd.Categorical(['Yes', 'No', 'No', 'No', 'Yes'])
df

Unnamed: 0,Name,Age,Gender,Coffee_Drinker,Diabetes
0,John,35,Male,True,Yes
1,Sarah,28,Female,True,No
2,Zach,33,Male,False,No
3,Beth,55,Female,True,No
4,Lachlan,43,Male,False,Yes


In [338]:
# Create a tibble-like DataFrame (tibble is just a modern DataFrame in R)
#tib1 = pd.DataFrame({'Name': Name, 'Age': Age, 'Gender': Gender, 'Coffee_Drinker': Coffee_Drinker})
#tib1

In [344]:
# Structure of DataFrame
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype   
---  ------          --------------  -----   
 0   Name            5 non-null      object  
 1   Age             5 non-null      int64   
 2   Gender          5 non-null      category
 3   Coffee_Drinker  5 non-null      bool    
 4   Diabetes        5 non-null      category
dtypes: bool(1), category(2), int64(1), object(1)
memory usage: 475.0+ bytes


In [356]:
# Convert between DataFrame and 'Dict'
df3 = df.to_dict(); df3 # as data frame (dict)

{'Name': {0: 'John', 1: 'Sarah', 2: 'Zach', 3: 'Beth', 4: 'Lachlan'},
 'Age': {0: 35, 1: 28, 2: 33, 3: 55, 4: 43},
 'Gender': {0: 'Male', 1: 'Female', 2: 'Male', 3: 'Female', 4: 'Male'},
 'Coffee_Drinker': {0: True, 1: True, 2: False, 3: True, 4: False},
 'Diabetes': {0: 'Yes', 1: 'No', 2: 'No', 3: 'No', 4: 'Yes'}}

In [358]:
df4 = pd.DataFrame(df3); df4  # as DataFrame

Unnamed: 0,Name,Age,Gender,Coffee_Drinker,Diabetes
0,John,35,Male,True,Yes
1,Sarah,28,Female,True,No
2,Zach,33,Male,False,No
3,Beth,55,Female,True,No
4,Lachlan,43,Male,False,Yes


---
#### Creating lists

In [363]:
# Lists in Python (can contain different types)
list1 = [c, Mat_A, df]
list1

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 array([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]]),
       Name  Age  Gender  Coffee_Drinker Diabetes
 0     John   35    Male            True      Yes
 1    Sarah   28  Female            True       No
 2     Zach   33    Male           False       No
 3     Beth   55  Female            True       No
 4  Lachlan   43    Male           False      Yes]

In [365]:
# Examine the structure of the list and its components
for i, item in enumerate(list1):
    print(f'Component {i+1} type: {type(item)}')

Component 1 type: <class 'list'>
Component 2 type: <class 'numpy.ndarray'>
Component 3 type: <class 'pandas.core.frame.DataFrame'>


In [367]:
# Access list components
list1[0]  # vector c

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [369]:
list1[1]  # matrix Mat_A

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [371]:
list1[2]  # data frame df

Unnamed: 0,Name,Age,Gender,Coffee_Drinker,Diabetes
0,John,35,Male,True,Yes
1,Sarah,28,Female,True,No
2,Zach,33,Male,False,No
3,Beth,55,Female,True,No
4,Lachlan,43,Male,False,Yes


In [373]:
# Create a dictionary (named list) in Python
list1 = {'VecC': c, 'MatA': Mat_A, 'DatFrame': df}
list1

{'VecC': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 'MatA': array([[1, 4, 7],
        [2, 5, 8],
        [3, 6, 9]]),
 'DatFrame':       Name  Age  Gender  Coffee_Drinker Diabetes
 0     John   35    Male            True      Yes
 1    Sarah   28  Female            True       No
 2     Zach   33    Male           False       No
 3     Beth   55  Female            True       No
 4  Lachlan   43    Male           False      Yes}

In [375]:
# Examine the structure of the dictionary
for k, v in list1.items():
    print(f'{k}: {type(v)}')

VecC: <class 'list'>
MatA: <class 'numpy.ndarray'>
DatFrame: <class 'pandas.core.frame.DataFrame'>


In [377]:
# Access dictionary components
list1['VecC']  # vector component

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [379]:
list1['MatA']  # matrix component

array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

In [381]:
list1['DatFrame']  # data frame component

Unnamed: 0,Name,Age,Gender,Coffee_Drinker,Diabetes
0,John,35,Male,True,Yes
1,Sarah,28,Female,True,No
2,Zach,33,Male,False,No
3,Beth,55,Female,True,No
4,Lachlan,43,Male,False,Yes


---
### Type Conversion

Python allows implicit and explicit type conversions. However, Python typically avoids implicit coercion between non-numeric types.

#### Explicit Type Conversion

In [389]:
int("42")        # String → Integer → 42

42

In [391]:
float(True)      # Boolean → Float → 1.0

1.0

In [393]:
str(3.14)        # Float → String → "3.14"

'3.14'

In [395]:
list((1, 2))     # Tuple → List → [1, 2]

[1, 2]

---
#### Implicit Type Conversion

General Rule: bool → int → float → complex

In [403]:
True + 5      # bool→int → 6

6

In [405]:
3 + 5.0       # int→float → 8.0

8.0

In [407]:
2.5 + 3j      # float→complex → (2.5+3j)

(2.5+3j)

In [438]:
mixed1 = (1 , 'a'); mixed1 # integer and string

(1, 'a')

In [440]:
type(mixed1)

tuple

In [442]:
list_mixed2 = [True, 'a']  # logical value coerced to string in R, but not in Python
list_mixed2

[True, 'a']

In [444]:
list_mixed3 = [True, 1]  # logical value is coerced to numeric in R, in Python True==1
list_mixed3

[True, 1]

In [446]:
# All elements are coerced to string in numpy array if types differ
nparray = np.array([5, False, 4.6, 'No']); nparray

array(['5', 'False', '4.6', 'No'], dtype='<U32')