# TASK #1: DEFINE SINGLE AND MULTI-DIMENSIONAL  NUMPY ARRAYS

In [1]:
# NumPy is a Linear Algebra Library used for multidimensional arrays
# NumPy brings the best of two worlds: (1) C/Fortran computational efficiency, (2) Python language easy syntax 

In [2]:
# Let's create a numpy array from the list "my_list"
import numpy as np
my_list = [12,21,44,56,73,22]
np.array(my_list)

array([12, 21, 44, 56, 73, 22])

In [3]:
# Multi-dimensional (Matrix definition) 
multi = [[3, 7, 9, 3],[4, 3, 2, 2]]
np.array(multi)

array([[3, 7, 9, 3],
       [4, 3, 2, 2]])

# TASK #2: LEVERAGE NUMPY BUILT-IN METHODS AND FUNCTIONS 

In [4]:
# "rand()" uniform distribution between 0 and 1
np.random.rand(2,4) # 2D
np.random.rand(2,2,4) # 3D - 2 sets of 2x4 matrices

array([[[0.85826223, 0.36997323, 0.44645292, 0.85248656],
        [0.8795004 , 0.71100185, 0.18732541, 0.57866292]],

       [[0.18459498, 0.49259478, 0.19213574, 0.22226049],
        [0.92802991, 0.40601489, 0.90055562, 0.80006083]]])

In [5]:
# you can create a matrix of random number as well,"randint" is used to generate random integers between upper and lower bounds

np.random.randint(10,50,(4,4)) #(start,stop,(shape))

array([[42, 32, 15, 29],
       [22, 39, 21, 29],
       [21, 47, 24, 35],
       [24, 31, 26, 24]])

In [6]:
# np.arange creates an evenly spaced values within a given interval
np.arange(10,34,3) # (start,stop,step)

array([10, 13, 16, 19, 22, 25, 28, 31])

In [7]:
# create a diagonal of ones and zeros everywhere else (an identity matrix which is a square matrix)
np.eye(5,dtype=int)

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

In [8]:
# Matrix of ones
np.ones((4,6),dtype=int) # ((shape),dtype)

array([[1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1, 1]])

In [9]:
# Array of zeros
np.zeros(5,dtype=int)

array([0, 0, 0, 0, 0])

MINI CHALLENGE #2:
- Write a code that takes in a positive integer "x" from the user and creates a 1x10 array with random numbers ranging from 0 to "x"

In [10]:
# x = int(input("Enter a number: "))
# np.random.randint(0,x,(1,10))

# TASK #3: PERFORM MATHEMATICAL OPERATIONS IN NUMPY

In [11]:
# np.arange() returns an evenly spaced values within a given interval

In [12]:
# Add 2 numpy arrays together
n1 = np.arange(10,20)
n2 = np.arange(20,30)
n1+n2

array([30, 32, 34, 36, 38, 40, 42, 44, 46, 48])

In [13]:
n1 = np.arange(10,20)
n2 = np.arange(15,25)
n1<n2

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

MINI CHALLENGE #3:
- Given the X and Y values below, obtain the distance between them

```
X = [5, 7, 20]
Y = [9, 15, 4]
```

In [14]:
X = np.array([5, 7, 20])
Y = np.array([9, 15, 4])
np.sqrt(X**2 + Y**2)

array([10.29563014, 16.55294536, 20.39607805])

# TASK #4: PERFORM ARRAYS SLICING AND INDEXING 

In [15]:
# Access specific index from the numpy array
X[2]

20

In [16]:
# Starting from the first index 0 up until and NOT including the last element
X[0:2]

array([5, 7])

In [17]:
# Broadcasting, altering several values in a numpy array at once
X[0:2] = [9,7]

In [18]:
X

array([ 9,  7, 20])

In [19]:
# Let's define a two dimensional numpy array
matrix = np.random.randint(1,10,(4,3))
matrix

array([[6, 5, 5],
       [1, 4, 5],
       [7, 5, 3],
       [1, 2, 7]])

In [20]:
# Get a row from a mtrix
matrix[0]

array([6, 5, 5])

In [21]:
# Get one element
matrix[2][2]

3

MINI CHALLENGE #4:
- In the following matrix, replace the last row with 0

```
X = [2 30 20 -2 -4]
    [3 4  40 -3 -2]
    [-3 4 -6 90 10]
```



In [22]:
X = np.array([[2, 30, 20, -2, -4],
    [3, 4, 40, -3, -2],
    [-3, 4, -6, 90, 10]])
X[0] = 0

In [23]:
X

array([[ 0,  0,  0,  0,  0],
       [ 3,  4, 40, -3, -2],
       [-3,  4, -6, 90, 10]])

# TASK #5: PERFORM ELEMENTS SELECTION (CONDITIONAL)

MINI CHALLENGE #5:
- In the following matrix, replace negative elements by 0 and replace odd elements with -2


```
X = [2 30 20 -2 -4]
    [3 4  40 -3 -2]
    [-3 4 -6 90 10]
    [25 45 34 22 12]
    [13 24 22 32 37]
```


In [24]:
# Obtain odd elements only
X = np.array([
    [2, 30, 20, -2, -4],
    [3, 4, 40, -3, -2],
    [-3, 4, -6, 90, 10],
    [25, 45, 34, 22, 12],
    [13, 24, 22, 32, 37]
])

In [25]:
X[X < 0] = 0

In [26]:
X[X%2 !=0] = -2

In [27]:
X

array([[ 2, 30, 20,  0,  0],
       [-2,  4, 40,  0,  0],
       [ 0,  4,  0, 90, 10],
       [-2, -2, 34, 22, 12],
       [-2, 24, 22, 32, -2]])

# TASK #6: UNDERSTAND PANDAS FUNDAMENTALS

In [28]:
# Pandas is a data manipulation and analysis tool that is built on Numpy.
# Pandas uses a data structure known as DataFrame (think of it as Microsoft excel in Python). 
# DataFrames empower programmers to store and manipulate data in a tabular fashion (rows and columns).
# Series Vs. DataFrame? Series is considered a single column of a DataFrame.

In [29]:
import pandas as pd

In [30]:
pd.Series([1,2,11,33])

0     1
1     2
2    11
3    33
dtype: int64

In [31]:
# Let's define a two-dimensional Pandas DataFrame
# Note that you can create a pandas dataframe from a python dictionary
df = pd.DataFrame({'rollNo': [1,2,6],'name':['sam','rob','kenny']}) #  keys should have lists as values
df

Unnamed: 0,rollNo,name
0,1,sam
1,2,rob
2,6,kenny


In [32]:
# Let's obtain the data type 
df.dtypes

rollNo     int64
name      object
dtype: object

In [33]:
# you can only view the first couple of rows using .head()
df.head()

Unnamed: 0,rollNo,name
0,1,sam
1,2,rob
2,6,kenny


In [34]:
# you can only view the last couple of rows using .tail()
df.tail()

Unnamed: 0,rollNo,name
0,1,sam
1,2,rob
2,6,kenny


MINI CHALLENGE #6:
- A porfolio contains a collection of securities such as stocks, bonds and ETFs. Define a dataframe named 'portfolio_df' that holds 3 different stock ticker symbols, number of shares, and price per share (feel free to choose any stocks)
- Calculate the total value of the porfolio including all stocks

In [35]:
# AAPL, TSLA, AMZN
portfolio_df = pd.DataFrame({'Stock':['AAPL','TSLA','AMZN'],'Shares':[10,15,8],'Price_Per_Share':[150,700,3350]})
portfolio_df

Unnamed: 0,Stock,Shares,Price_Per_Share
0,AAPL,10,150
1,TSLA,15,700
2,AMZN,8,3350


In [36]:
s = portfolio_df['Shares'] * portfolio_df['Price_Per_Share']
s.sum()

38800

# TASK #7: PANDAS WITH CSV AND HTML DATA

In [37]:
# Pandas is used to read a csv file and store data in a DataFrame
pd.read_csv('customer.csv')

Unnamed: 0,age,gender,review,education,purchased
0,30,Female,Average,School,No
1,68,Female,Poor,UG,No
2,70,Female,Good,PG,No
3,72,Female,Good,PG,No
4,16,Female,Average,UG,No
5,31,Female,Average,School,Yes
6,18,Male,Good,School,No
7,60,Female,Poor,School,Yes
8,65,Female,Average,UG,No
9,74,Male,Good,UG,Yes


In [38]:
# Read tabular data using read_html
url = "https://en.wikipedia.org/wiki/List_of_countries_by_GDP_(nominal)" 
tables = pd.read_html(url) # scrapes all tables from the given webpage,return a list of DataFrames

In [39]:
df = tables[2]
df

Unnamed: 0_level_0,Country/Territory,IMF[1][13],IMF[1][13],World Bank[14],World Bank[14],United Nations[15],United Nations[15]
Unnamed: 0_level_1,Country/Territory,Forecast,Year,Estimate,Year,Estimate,Year
0,World,115494312,2025,105435540,2023,100834796,2022
1,United States,30337162,2025,27360935,2023,25744100,2022
2,China,19534894,[n 1]2025,17794782,[n 3]2023,17963170,[n 1]2022
3,Germany,4921563,2025,4456081,2023,4076923,2022
4,Japan,4389326,2025,4212945,2023,4232173,2022
...,...,...,...,...,...,...,...
205,Kiribati,311,2024,279,2023,223,2022
206,Palau,308,2024,263,2023,225,2022
207,Marshall Islands,305,2024,284,2023,279,2022
208,Nauru,161,2024,154,2023,147,2022


MINI CHALLENGE #7:
- Write a code that uses Pandas to read tabular US retirement data
- You can use data from here: https://www.ssa.gov/oact/progdata/nra.html 

In [40]:
df = pd.read_html('https://www.ssa.gov/oact/progdata/nra.html')
df = df[0]
df

Unnamed: 0,Year of birth,Age
0,1937 and prior,65
1,1938,65 and 2 months
2,1939,65 and 4 months
3,1940,65 and 6 months
4,1941,65 and 8 months
5,1942,65 and 10 months
6,1943-54,66
7,1955,66 and 2 months
8,1956,66 and 4 months
9,1957,66 and 6 months


# TASK #8: PANDAS OPERATIONS

In [41]:
# Pick certain rows that satisfy a certain criteria 
df['Year of birth'] = pd.to_numeric(df['Year of birth'], errors='coerce')
df['Year of birth'] > 1950 # return a boolean series
df[df['Year of birth'] > 1950]

Unnamed: 0,Year of birth,Age
7,1955.0,66 and 2 months
8,1956.0,66 and 4 months
9,1957.0,66 and 6 months
10,1958.0,66 and 8 months
11,1959.0,66 and 10 months


In [42]:
# Delete a column from a DataFrame
df.drop('Age',axis=1)

Unnamed: 0,Year of birth
0,
1,1938.0
2,1939.0
3,1940.0
4,1941.0
5,1942.0
6,
7,1955.0
8,1956.0
9,1957.0


MINI CHALLENGE #8:
- Using "bank_client_df" DataFrame, leverage pandas operations to only select high networth individuals with minimum $5000 
- What is the combined networth for all customers with 5000+ networth?

In [43]:
data = {
    'Client_ID': [101, 102, 103, 104, 105],
    'Name': ['Ayesha', 'Rob', 'Kendall', 'Billie', 'Hailey'],
    'Account_Balance ($)': [5500, 500, 12000, 3000, 800]
}
df = pd.DataFrame(data)
df[df['Account_Balance ($)']> 5000]

Unnamed: 0,Client_ID,Name,Account_Balance ($)
0,101,Ayesha,5500
2,103,Kendall,12000


In [44]:
df = df[df['Account_Balance ($)'] > 5000]
df['Account_Balance ($)'].sum()

17500

# TASK #9: PANDAS WITH FUNCTIONS

In [45]:
# Let's define a dataframe as follows:
bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], 
                               'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], 
                               'Net worth [$]':[3500, 29000, 10000, 2000], 
                               'Years with bank':[3, 4, 9, 5]})
bank_client_df

Unnamed: 0,Bank client ID,Bank Client Name,Net worth [$],Years with bank
0,111,Chanel,3500,3
1,222,Steve,29000,4
2,333,Mitch,10000,9
3,444,Ryan,2000,5


In [46]:
# Define a function that increases all clients networth (stocks) by a fixed value of 20%
bank_client_df['Net worth [$]'].apply(lambda x:x+(x*0.2))

0     4200.0
1    34800.0
2    12000.0
3     2400.0
Name: Net worth [$], dtype: float64

MINI CHALLENGE #9:
- Define a function that triples the stock prices and adds $200
- Apply the function to the DataFrame
- Calculate the updated total networth of all clients combined

In [47]:
bank_client_df['Net worth [$]'] = bank_client_df['Net worth [$]'].apply(lambda x: (x*3) + 200)
bank_client_df['Net worth [$]'].sum()

134300

# TASK #10: PERFORM SORTING AND ORDERING IN PANDAS

In [48]:
# Let's define a dataframe as follows:
bank_client_df = pd.DataFrame({'Bank client ID':[111, 222, 333, 444], 
                               'Bank Client Name':['Chanel', 'Steve', 'Mitch', 'Ryan'], 
                               'Net worth [$]':[3500, 29000, 10000, 2000], 
                               'Years with bank':[3, 4, 9, 5]})
bank_client_df

Unnamed: 0,Bank client ID,Bank Client Name,Net worth [$],Years with bank
0,111,Chanel,3500,3
1,222,Steve,29000,4
2,333,Mitch,10000,9
3,444,Ryan,2000,5


In [49]:
# You can sort the values in the dataframe according to number of years with bank
bank_client_df.sort_values('Years with bank',ascending=False,inplace=True)
bank_client_df

Unnamed: 0,Bank client ID,Bank Client Name,Net worth [$],Years with bank
2,333,Mitch,10000,9
3,444,Ryan,2000,5
1,222,Steve,29000,4
0,111,Chanel,3500,3


# TASK #11: PERFORM CONCATENATING AND MERGING WITH PANDAS

In [50]:
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D5", "D2", "D3"],
    })


df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    })

df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B4", "B10", "B7"],
        "C": ["C8", "C5", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    })
df = pd.concat([df1,df2,df3],ignore_index=True) # Vertical Concatenation (Stack Rows)
df

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D5
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7
8,A8,B8,C8,D8
9,A9,B4,C5,D9


In [51]:
s1 = pd.Series(["X0", "X1", "X2", "X3"], name="X")

result = pd.concat([df1, s1], axis=1) # horizontal concatenation when axis = 1
result

Unnamed: 0,A,B,C,D,X
0,A0,B0,C0,D0,X0
1,A1,B1,C1,D5,X1
2,A2,B2,C2,D2,X2
3,A3,B3,C3,D3,X3


In [52]:
# Use concat() when you need to stack DataFrames.
# Use merge() when you need to join DataFrames on a common column.

- Inner Join → When you need only matching records.
- Left Join → When you want all records from the first DataFrame.
- Right Join → When you want all records from the second DataFrame.
- Outer Join → When you need all records, even if they don’t match.

In [53]:
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 4], 'Salary': [50000, 60000, 70000]})

df1.merge(df2,on='ID',how='left')

Unnamed: 0,ID,Name,Salary
0,1,Alice,50000.0
1,2,Bob,60000.0
2,3,Charlie,


# TASK #12: PROJECT

- Define a dataframe named 'Bank_df_1' that contains the first and last names for 5 bank clients with IDs = 1, 2, 3, 4, 5 
- Assume that the bank got 5 new clients, define another dataframe named 'Bank_df_2' that contains a new clients with IDs = 6, 7, 8, 9, 10
- Let's assume we obtained additional information (Annual Salary) about all our bank customers (10 customers) 
- Concatenate both 'bank_df_1' and 'bank_df_2' dataframes
- Merge client names and their newly added salary information using the 'Bank Client ID'
- Let's assume that you became a new client to the bank
- Define a new DataFrame that contains your information such as client ID (choose 11), first name, last name, and annual salary.
- Add this new dataframe to the original dataframe 'bank_df_all'.

In [54]:
Bank_df_1 = pd.DataFrame({'IDs' : [1, 2, 3, 4, 5],'Name': ['Alice', 'Bob', 'Charlie','Kendall','Keny']})
Bank_df_2 = pd.DataFrame({'IDs' : [6, 7, 8, 9, 10],'Name': ['A', 'B', 'C','E','F']})
Bank_info = pd.DataFrame({'IDs' : [1, 2, 3, 4, 5,6,7,8,9,10],'Annual_Salary':[100000,200000,300000,44000,230000,700000,900000,10000,12000,830000]})
Bank_df = pd.concat([Bank_df_1,Bank_df_2])

In [55]:
Bank_df

Unnamed: 0,IDs,Name
0,1,Alice
1,2,Bob
2,3,Charlie
3,4,Kendall
4,5,Keny
0,6,A
1,7,B
2,8,C
3,9,E
4,10,F


In [56]:
Bank_df_all = Bank_df.merge(Bank_info,on='IDs')
Bank_df_all

Unnamed: 0,IDs,Name,Annual_Salary
0,1,Alice,100000
1,2,Bob,200000
2,3,Charlie,300000
3,4,Kendall,44000
4,5,Keny,230000
5,6,A,700000
6,7,B,900000
7,8,C,10000
8,9,E,12000
9,10,F,830000


In [57]:
new_client = {'IDs' : [11],'Name':['Ayesha'],'Annual_Salary':[550000]}
new_df = pd.DataFrame(new_client)
pd.concat([Bank_df_all,new_df]) # concatenate/add

Unnamed: 0,IDs,Name,Annual_Salary
0,1,Alice,100000
1,2,Bob,200000
2,3,Charlie,300000
3,4,Kendall,44000
4,5,Keny,230000
5,6,A,700000
6,7,B,900000
7,8,C,10000
8,9,E,12000
9,10,F,830000
