In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Pandas Series

In [6]:
my_days = ['Mon', 'Tue', 'Wed']
my_earnings = [100, 104, 102]

In [7]:
type(my_earnings)

list

In [8]:
arr_my_days = np.array(my_days)
arr_my_earnings = np.array(my_earnings)
type(arr_my_earnings)

numpy.ndarray

In [9]:
pd.Series(arr_my_earnings)

0    100
1    104
2    102
dtype: int32

In [10]:
pd.Series(my_earnings)

0    100
1    104
2    102
dtype: int64

In [12]:
series_earnings = pd.Series(data=my_earnings, index=my_days)

In [13]:
series_earnings

Mon    100
Tue    104
Wed    102
dtype: int64

In [16]:
series_earnings[1], series_earnings[0], series_earnings['Wed']

(104, 100, 102)

In [17]:
series_earnings.index

Index(['Mon', 'Tue', 'Wed'], dtype='object')

## Pandas DataFrame

#### Creation of DataFrames using Arrays or Lists

In [19]:
values = np.arange(1, 10).reshape(3, 3)
values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [20]:
pd.DataFrame(data=values)

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [21]:
index_values = ['R1', 'R2', 'R3']
col_values = ['C1', 'C2', 'C3']

In [22]:
data = pd.DataFrame(data=values, index=index_values, columns=col_values)
data

Unnamed: 0,C1,C2,C3
R1,1,2,3
R2,4,5,6
R3,7,8,9


#### Creation of DataFrame using Dictionary

In [23]:
data1 = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}, index=['R1', 'R2', 'R3'])
data1

Unnamed: 0,A,B
R1,1,4
R2,2,5
R3,3,6


#### Creation of DataFrame using CSV files

In [32]:
data_csv = pd.read_csv('data/Piece_Dim.csv')
data_csv.sample(10)

Unnamed: 0,Item_No,Length,Width,Height,Operator
21,Item-22,98.06,48.39,19.79,Op-1
8,Item-9,90.62,47.29,19.78,Op-1
16,Item-17,107.69,48.18,19.33,Op-1
47,Item-48,95.61,46.3,20.31,Op-2
15,Item-16,95.51,45.36,20.52,Op-1
44,Item-45,106.53,48.55,19.2,Op-2
9,Item-10,97.22,52.14,20.71,Op-1
1,Item-2,102.5,51.42,19.63,Op-1
34,Item-35,101.86,50.45,18.05,Op-2
2,Item-3,95.37,52.25,21.51,Op-1


In [33]:
data_csv.set_index('Item_No', inplace=True)
data_csv.sample(10)

Unnamed: 0_level_0,Length,Width,Height,Operator
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Item-35,101.86,50.45,18.05,Op-2
Item-7,97.35,48.05,20.22,Op-1
Item-23,108.45,44.46,22.31,Op-1
Item-29,93.54,51.05,19.29,Op-2
Item-17,107.69,48.18,19.33,Op-1
Item-20,94.31,48.74,18.8,Op-1
Item-39,103.0,48.81,19.39,Op-2
Item-11,100.0,54.76,20.62,Op-1
Item-27,105.88,49.53,18.7,Op-2
Item-22,98.06,48.39,19.79,Op-1


In [34]:
data_csv.info()

<class 'pandas.core.frame.DataFrame'>
Index: 50 entries, Item-1 to Item-50
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Length    50 non-null     float64
 1   Width     50 non-null     float64
 2   Height    50 non-null     float64
 3   Operator  50 non-null     object 
dtypes: float64(3), object(1)
memory usage: 2.0+ KB


In [35]:
data_csv.describe()

Unnamed: 0,Length,Width,Height
count,50.0,50.0,50.0
mean,100.1288,49.4614,20.014
std,5.075593,2.615639,1.074069
min,89.82,44.46,17.37
25%,95.675,48.105,19.375
50%,99.83,49.315,19.93
75%,104.4025,50.78,20.595
max,110.54,55.76,23.11


In [37]:
data_csv['Length'].head()

Item_No
Item-1    102.67
Item-2    102.50
Item-3     95.37
Item-4     94.77
Item-5    104.26
Name: Length, dtype: float64

In [38]:
data_csv[['Length', 'Width']].head()

Unnamed: 0_level_0,Length,Width
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1
Item-1,102.67,49.53
Item-2,102.5,51.42
Item-3,95.37,52.25
Item-4,94.77,49.24
Item-5,104.26,47.9


In [40]:
data_csv['Volume'] = np.round(data_csv['Length'] * data_csv['Width'] * data_csv['Height'], 
                              decimals=2)
data_csv.head()

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-1,102.67,49.53,19.69,Op-1,100128.48
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-3,95.37,52.25,21.51,Op-1,107186.1
Item-4,94.77,49.24,18.6,Op-1,86796.43
Item-5,104.26,47.9,19.46,Op-1,97184.29


In [44]:
data_csv.drop('Volume', axis=1).sample(10)

Unnamed: 0_level_0,Length,Width,Height,Operator
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Item-35,101.86,50.45,18.05,Op-2
Item-7,97.35,48.05,20.22,Op-1
Item-31,98.38,46.79,21.27,Op-2
Item-4,94.77,49.24,18.6,Op-1
Item-46,101.21,47.61,21.39,Op-2
Item-39,103.0,48.81,19.39,Op-2
Item-13,105.72,50.04,20.06,Op-1
Item-48,95.61,46.3,20.31,Op-2
Item-20,94.31,48.74,18.8,Op-1
Item-14,89.82,45.98,20.3,Op-1


In [46]:
data_csv.value_counts(subset='Operator')

Operator
Op-1    25
Op-2    25
dtype: int64

In [48]:
data_csv.loc['Item-2']

Length         102.5
Width          51.42
Height         19.63
Operator        Op-1
Volume      103460.9
Name: Item-2, dtype: object

In [51]:
data_csv.iloc[1]

Length         102.5
Width          51.42
Height         19.63
Operator        Op-1
Volume      103460.9
Name: Item-2, dtype: object

In [49]:
data_csv.loc[['Item-2', 'Item-4']]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-4,94.77,49.24,18.6,Op-1,86796.43


In [52]:
data_csv.iloc[[1, 3]]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-4,94.77,49.24,18.6,Op-1,86796.43


In [50]:
data_csv.loc[['Item-2', 'Item-4'], ['Length', 'Height']]

Unnamed: 0_level_0,Length,Height
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1
Item-2,102.5,19.63
Item-4,94.77,18.6


In [53]:
data_csv.iloc[[1, 3], [0, 2]]

Unnamed: 0_level_0,Length,Height
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1
Item-2,102.5,19.63
Item-4,94.77,18.6


In [54]:
data_csv.iloc[[1, 3], :]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-4,94.77,49.24,18.6,Op-1,86796.43


In [55]:
data_csv.iloc[:5, :]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-1,102.67,49.53,19.69,Op-1,100128.48
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-3,95.37,52.25,21.51,Op-1,107186.1
Item-4,94.77,49.24,18.6,Op-1,86796.43
Item-5,104.26,47.9,19.46,Op-1,97184.29


#### Mask or filtering data

In [59]:
rand_numbers = np.arange(0, 10, 1)
rand_numbers

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [60]:
# boolean mask
rand_numbers < 5

array([ True,  True,  True,  True,  True, False, False, False, False,
       False])

In [61]:
rand_numbers[rand_numbers<5]

array([0, 1, 2, 3, 4])

In [64]:
data_csv_op1 = data_csv[ data_csv['Operator'] == 'Op-1']

In [65]:
data_csv_op1.describe()

Unnamed: 0,Length,Width,Height,Volume
count,25.0,25.0,25.0,25.0
mean,99.5472,49.3012,20.0248,98268.004
std,5.147417,2.955796,0.823514,8538.478768
min,89.82,44.46,18.6,83837.45
25%,95.51,47.9,19.51,91547.16
50%,98.73,49.24,19.94,100128.48
75%,104.26,50.81,20.36,105766.95
max,108.45,55.76,22.31,112915.12


In [66]:
data_csv[data_csv['Length']>105]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-6,105.18,49.39,20.36,Op-1,105766.95
Item-13,105.72,50.04,20.06,Op-1,106121.99
Item-17,107.69,48.18,19.33,Op-1,100293.79
Item-18,106.83,50.81,19.12,Op-1,103783.98
Item-23,108.45,44.46,22.31,Op-1,107571.84
Item-27,105.88,49.53,18.7,Op-2,98067.22
Item-36,105.22,54.61,20.64,Op-2,118598.77
Item-41,110.54,51.07,21.95,Op-2,123913.85
Item-42,109.22,45.24,20.05,Op-2,99069.31
Item-44,107.05,49.01,18.97,Op-2,99526.49


In [68]:
data_csv[ (data_csv['Length']>105) & (data_csv['Operator']=='Op-1') ]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-6,105.18,49.39,20.36,Op-1,105766.95
Item-13,105.72,50.04,20.06,Op-1,106121.99
Item-17,107.69,48.18,19.33,Op-1,100293.79
Item-18,106.83,50.81,19.12,Op-1,103783.98
Item-23,108.45,44.46,22.31,Op-1,107571.84


In [69]:
data_csv[(data_csv['Length']>=95) & (data_csv['Length']<=105)]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-1,102.67,49.53,19.69,Op-1,100128.48
Item-2,102.5,51.42,19.63,Op-1,103460.9
Item-3,95.37,52.25,21.51,Op-1,107186.1
Item-5,104.26,47.9,19.46,Op-1,97184.29
Item-7,97.35,48.05,20.22,Op-1,94582.44
Item-8,99.35,44.59,21.03,Op-1,93163.25
Item-10,97.22,52.14,20.71,Op-1,104980.04
Item-11,100.0,54.76,20.62,Op-1,112915.12
Item-12,97.23,48.26,19.51,Op-1,91547.16
Item-15,99.17,53.54,20.25,Op-1,107518.63


In [70]:
data_csv[~(data_csv['Length']>=95) & (data_csv['Length']<=105)]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-4,94.77,49.24,18.6,Op-1,86796.43
Item-9,90.62,47.29,19.78,Op-1,84765.6
Item-14,89.82,45.98,20.3,Op-1,83837.45
Item-20,94.31,48.74,18.8,Op-1,86417.38
Item-29,93.54,51.05,19.29,Op-2,92113.94
Item-43,92.52,49.04,19.92,Op-2,90380.64
Item-50,93.28,48.08,19.64,Op-2,88083.48


In [71]:
data_csv[(data_csv['Length']>=110) | (data_csv['Width']<=45)]

Unnamed: 0_level_0,Length,Width,Height,Operator,Volume
Item_No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Item-8,99.35,44.59,21.03,Op-1,93163.25
Item-23,108.45,44.46,22.31,Op-1,107571.84
Item-41,110.54,51.07,21.95,Op-2,123913.85


In [None]:
my