# Pandas

## Series

In [167]:
import pandas as pd
import numpy as np

In [168]:
products = ['A','B','C','D']

In [169]:
products

['A', 'B', 'C', 'D']

In [170]:
type(products)

list

In [171]:
product_categories = pd.Series(products) # making a series

In [172]:
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [173]:
type(product_categories)

pandas.core.series.Series

In [174]:
start_dates_deposits = pd.Series({
    '7/4/2024':2000,
    '1/2/2023':3000,
    '10/4/2023':5000
})

In [175]:
# as shown above u can make a series from a dictionary too

In [176]:
start_dates_deposits

7/4/2024     2000
1/2/2023     3000
10/4/2023    5000
dtype: int64

In [177]:
start_dates_deposits.sum()

10000

In [178]:
# wont sum the dates because the dates are an index

In [179]:
start_dates_deposits.min()

2000

In [180]:
start_dates_deposits.max()

5000

In [181]:
start_dates_deposits.idxmin()

'7/4/2024'

In [182]:
start_dates_deposits.idxmax()

'10/4/2023'

In [183]:
start_dates_deposits.head() # returns the first 5 rows along with index

7/4/2024     2000
1/2/2023     3000
10/4/2023    5000
dtype: int64

In [184]:
start_dates_deposits.head(1) # will return only 1st row

7/4/2024    2000
dtype: int64

In [185]:
start_dates_deposits.tail() # bottom 5 or smth

7/4/2024     2000
1/2/2023     3000
10/4/2023    5000
dtype: int64

In [186]:
start_dates_deposits.tail(1)

10/4/2023    5000
dtype: int64

## Dataframes

In [187]:
array_a = np.array([[3,2,1],[6,3,2]])

In [188]:
array_a

array([[3, 2, 1],
       [6, 3, 2]])

In [189]:
pd.DataFrame(array_a)

Unnamed: 0,0,1,2
0,3,2,1
1,6,3,2


In [190]:
type(pd.DataFrame(array_a))

pandas.core.frame.DataFrame

In [191]:
df = pd.DataFrame(array_a,columns = ['Column 1','Column 2','Columm 3'])

In [192]:
df

Unnamed: 0,Column 1,Column 2,Columm 3
0,3,2,1
1,6,3,2


In [193]:
df = pd.DataFrame(array_a,columns = ['Column 1','Column 2','Columm 3'], index= ['row 1','row 2'])

In [194]:
df

Unnamed: 0,Column 1,Column 2,Columm 3
row 1,3,2,1
row 2,6,3,2


In [195]:
data = pd.read_csv('Lending-company.csv', index_col = 'LoanID')

In [196]:
lending_co_data = data # good practice to make a copy and keep original data safe

In [197]:
lending_co_data.head()

Unnamed: 0_level_0,StringID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
LoanID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,LoanID_1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
2,LoanID_2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
3,LoanID_3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
4,LoanID_4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
5,LoanID_5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [198]:
# NaN means nul (NaN is missing value)

In [199]:
lending_co_data.index

Index([   1,    2,    3,    4,    5,    6,    7,    8,    9,   10,
       ...
       1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043],
      dtype='int64', name='LoanID', length=1043)

In [200]:
lending_co_data.columns

Index(['StringID', 'Product', 'CustomerGender', 'Location', 'Region',
       'TotalPrice', 'StartDate', 'Deposit', 'DailyRate', 'TotalDaysYr',
       'AmtPaid36', 'AmtPaid60', 'AmtPaid360', 'LoanStatus'],
      dtype='object')

In [201]:
type(lending_co_data)

pandas.core.frame.DataFrame

In [202]:
type(lending_co_data.columns)

pandas.core.indexes.base.Index

In [203]:
type(lending_co_data.dtypes)

pandas.core.series.Series

In [204]:
lending_co_data.dtypes

StringID           object
Product            object
CustomerGender     object
Location           object
Region             object
TotalPrice        float64
StartDate          object
Deposit             int64
DailyRate           int64
TotalDaysYr         int64
AmtPaid36           int64
AmtPaid60           int64
AmtPaid360          int64
LoanStatus         object
dtype: object

In [205]:
lending_co_data.values # converts df to numpy array

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [206]:
lending_co_data.values.shape

(1043, 14)

In [207]:
lending_co_data.to_numpy() # do the same thing

array([['LoanID_1', 'Product B', 'Female', ..., 4166, 14621, 'Active'],
       ['LoanID_2', 'Product D', 'Female', ..., 4096, 16041, 'Active'],
       ['LoanID_3', 'Product B', 'Male', ..., 3205, 16340, nan],
       ...,
       ['LoanID_1041', 'Product A', 'NotSpecified', ..., 5143, 16617,
        'Finished Payment'],
       ['LoanID_1042', 'Product B', 'Female', ..., 3462, 15617,
        'Finished Payment'],
       ['LoanID_1043', 'Product A', 'NotSpecified', ..., 4743, 16617,
        'Finished Payment']], dtype=object)

In [208]:
ls

 Volume in drive C has no label.
 Volume Serial Number is 42D7-CC54

 Directory of C:\Users\abuba\Downloads\innovista\day 2

12/24/2024  07:53 PM    <DIR>          .
12/22/2024  06:00 PM    <DIR>          ..
12/24/2024  06:38 PM    <DIR>          .ipynb_checkpoints
12/22/2024  06:01 PM         1,933,919 Introduction To Python Course.pptx
12/22/2024  06:01 PM           115,131 Lending-company.csv
12/24/2024  07:20 PM            35,265 numpy.ipynb
12/22/2024  06:02 PM         1,191,835 numpy.pptx
12/22/2024  06:01 PM           126,103 NumpyPandas.ipynb
12/24/2024  07:53 PM            28,190 pandas.ipynb
12/22/2024  06:02 PM           352,110 pandas.pptx
12/22/2024  06:02 PM            34,681 Python.ipynb
12/22/2024  06:02 PM           155,937 Sales-products.csv
               9 File(s)      3,973,171 bytes
               3 Dir(s)  101,706,502,144 bytes free


In [209]:
len(lending_co_data.columns)

14

In [213]:
data2 = pd.read_csv('Lending-company.csv', index_col = 'StringID')

In [214]:
data2.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [224]:
data2.Product # gets column Product but this method not recomended because what if Product was a method

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
                 ...    
LoanID_1039    Product B
LoanID_1040    Product A
LoanID_1041    Product A
LoanID_1042    Product B
LoanID_1043    Product A
Name: Product, Length: 1043, dtype: object

In [216]:
# better way:

In [223]:
data2['Product']

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
                 ...    
LoanID_1039    Product B
LoanID_1040    Product A
LoanID_1041    Product A
LoanID_1042    Product B
LoanID_1043    Product A
Name: Product, Length: 1043, dtype: object

In [222]:
data2['Location']

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

In [219]:
# how to get more than 1 column

In [221]:
data2[['Location', 'Product']] # the thing passed must be a list so double brackets neccesary

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B
...,...,...
LoanID_1039,Location 73,Product B
LoanID_1040,Location 82,Product A
LoanID_1041,Location 11,Product A
LoanID_1042,Location 26,Product B


In [226]:
data2[['Location', 'Product']].head()

Unnamed: 0_level_0,Location,Product
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1
LoanID_1,Location 3,Product B
LoanID_2,Location 6,Product D
LoanID_3,Location 8,Product B
LoanID_4,Location 26,Product A
LoanID_5,Location 34,Product B


In [227]:
# iloc and loc

In [232]:
data2.iloc[1] # used when u know index number (Dont know name)

LoanID                     2
Product            Product D
CustomerGender        Female
Location          Location 6
Region              Region 6
TotalPrice               NaN
StartDate         02/01/2019
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3161
AmtPaid60               4096
AmtPaid360             16041
LoanStatus            Active
Name: LoanID_2, dtype: object

In [233]:
data2.iloc[0] # row 0

LoanID                     1
Product            Product B
CustomerGender        Female
Location          Location 3
Region              Region 2
TotalPrice           17600.0
StartDate         04/07/2018
Deposit                 2200
DailyRate                 45
TotalDaysYr              365
AmtPaid36               3221
AmtPaid60               4166
AmtPaid360             14621
LoanStatus            Active
Name: LoanID_1, dtype: object

In [234]:
data2.iloc[1,3]

'Location 6'

In [238]:
data2.iloc[:,3] # column 3

StringID
LoanID_1        Location 3
LoanID_2        Location 6
LoanID_3        Location 8
LoanID_4       Location 26
LoanID_5       Location 34
                  ...     
LoanID_1039    Location 73
LoanID_1040    Location 82
LoanID_1041    Location 11
LoanID_1042    Location 26
LoanID_1043    Location 94
Name: Location, Length: 1043, dtype: object

In [242]:
data2.head()

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_1,1,Product B,Female,Location 3,Region 2,17600.0,04/07/2018,2200,45,365,3221,4166,14621,Active
LoanID_2,2,Product D,Female,Location 6,Region 6,,02/01/2019,2200,45,365,3161,4096,16041,Active
LoanID_3,3,Product B,Male,Location 8,Region 3,16600.0,08/12/2016,1000,45,365,2260,3205,16340,
LoanID_4,4,Product A,Male,Location 26,Region 2,17600.0,,2200,45,365,3141,4166,16321,Active
LoanID_5,5,Product B,Female,Location 34,Region 3,21250.0,28/10/2017,2200,55,365,3570,4745,14720,Active


In [244]:
data2.loc['LoanID_3'] # third row

LoanID                     3
Product            Product B
CustomerGender          Male
Location          Location 8
Region              Region 3
TotalPrice           16600.0
StartDate         08/12/2016
Deposit                 1000
DailyRate                 45
TotalDaysYr              365
AmtPaid36               2260
AmtPaid60               3205
AmtPaid360             16340
LoanStatus               NaN
Name: LoanID_3, dtype: object

In [246]:
data2.loc[:,'Product'] # Product comlumn

StringID
LoanID_1       Product B
LoanID_2       Product D
LoanID_3       Product B
LoanID_4       Product A
LoanID_5       Product B
                 ...    
LoanID_1039    Product B
LoanID_1040    Product A
LoanID_1041    Product A
LoanID_1042    Product B
LoanID_1043    Product A
Name: Product, Length: 1043, dtype: object

In [247]:
data2.iloc[5:10]

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active
LoanID_7,7,Product A,Male,Location 25,,21250.0,04/07/2020,2200,55,365,1951,3176,18701,Active
LoanID_8,8,Product D,Male,Location 46,Region 5,17600.0,24/04/2018,2200,45,365,4071,4056,16351,Active
LoanID_9,9,Product A,Male,Location 156,Region 6,23250.0,03/09/2019,5000,55,365,5850,7375,21250,
LoanID_10,10,Product C,Male,Location 21,Region 9,21250.0,25/07/2020,2200,55,365,2051,3176,18351,Active


In [250]:
data2.loc['LoanID_6':'LoanID_10']

Unnamed: 0_level_0,LoanID,Product,CustomerGender,Location,Region,TotalPrice,StartDate,Deposit,DailyRate,TotalDaysYr,AmtPaid36,AmtPaid60,AmtPaid360,LoanStatus
StringID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
LoanID_6,6,Product A,Male,Location 34,Region 1,,19/04/2019,2200,45,365,3301,4066,15141,Active
LoanID_7,7,Product A,Male,Location 25,,21250.0,04/07/2020,2200,55,365,1951,3176,18701,Active
LoanID_8,8,Product D,Male,Location 46,Region 5,17600.0,24/04/2018,2200,45,365,4071,4056,16351,Active
LoanID_9,9,Product A,Male,Location 156,Region 6,23250.0,03/09/2019,5000,55,365,5850,7375,21250,
LoanID_10,10,Product C,Male,Location 21,Region 9,21250.0,25/07/2020,2200,55,365,2051,3176,18351,Active


In [252]:
data2.loc['LoanID_8', 'TotalPrice']

17600.0

In [253]:
np.NaN

nan

In [254]:
array_a

array([[3, 2, 1],
       [6, 3, 2]])

In [258]:
arr = array_a.flatten()

In [259]:
arr

array([3, 2, 1, 6, 3, 2])

In [260]:
for i in arr:
    print(i)

3
2
1
6
3
2


In [261]:
for idx,ele in enumerate(arr):
    print(idx,ele)

0 3
1 2
2 1
3 6
4 3
5 2


In [10]:
array_b = ['product','loan', '4000']
for idx,ele in enumerate(array_b):
    print(idx,ele)

0 product
1 loan
2 4000


In [16]:
with open ('file1.txt', 'w') as f1:
        f1.writelines(array_b)

In [17]:
with open ('file1.txt', 'a') as f1:
        for i in array_b:
            f1.writelines(i + '\n')

In [19]:
with open('file1.txt', 'r') as f1:
    contents = f1.read()

In [20]:
contents

'productloan4000product\nloan\n4000\n'