## Options

### Pandas

In [2]:
import pandas as pd

In [59]:
# Set Pandas options for wrapping
pd.set_option('display.max_columns', None)   # Show all columns
pd.set_option('display.width', 100)          # Set line width before wrapping
pd.set_option('display.max_colwidth', None)  # Don't truncate text

In [4]:
from pydataset import data
mtcars = data('mtcars')
mtcars.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


## Functions in Pandas
- Understanding Data
    - type  # type of object
    - shape # rows & cols in data
    - columns # column names
    - dtypes  # datatype 
    - info()  # datatypes & non-null counts
    - sample(x) # randon x rows
    - head(x) # first x rows
    - tail(x) # last x rows
- Summary Statistics
    - describe()  #summary stats for numeric cols
    - describe(include='all')  #summary stats for all cols
    - mtcars.nunique() #count of unique values per column
    - value_counts()  # freq count for a series
- Missing/Duplicates
    -  isnull().sum()   # missing value count per cols
    -  notnull().sum()  # non missing value count
    -  duplicated(().sum() # count of duplicate rows
- View Specific Col/ Row
    -  ['col_name']   # single column
    -  [['col1','col2']] # multiple columns
    -  iloc[0]   #1st row by index position
    -  loc[0] #1st row by label
    -  iloc[0:5]   # first 5 rows
-  Basic Data Insights
    -  corr(numeric_only=True)  #correlation matrix (numeric only)
    -  mean(numeric_only=True)   #mean of numeric cols
    -  mode().iloc(0) #most frequent values
    -  median(numeric_only=True) # median of numeric cols 

In [5]:
type(mtcars)

pandas.core.frame.DataFrame

In [6]:
mtcars.head()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [8]:
mtcars.tail(4)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2


In [9]:
mtcars.sample(3)

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Volvo 142E,21.4,4,121.0,109,4.11,2.78,18.6,1,1,4,2
Chrysler Imperial,14.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,0,3,3


In [10]:
mtcars.shape

(32, 11)

In [11]:
mtcars.columns

Index(['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear',
       'carb'],
      dtype='object')

In [12]:
mtcars.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32 entries, Mazda RX4 to Volvo 142E
Data columns (total 11 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   mpg     32 non-null     float64
 1   cyl     32 non-null     int64  
 2   disp    32 non-null     float64
 3   hp      32 non-null     int64  
 4   drat    32 non-null     float64
 5   wt      32 non-null     float64
 6   qsec    32 non-null     float64
 7   vs      32 non-null     int64  
 8   am      32 non-null     int64  
 9   gear    32 non-null     int64  
 10  carb    32 non-null     int64  
dtypes: float64(5), int64(6)
memory usage: 3.0+ KB


In [13]:
mtcars.dtypes

mpg     float64
cyl       int64
disp    float64
hp        int64
drat    float64
wt      float64
qsec    float64
vs        int64
am        int64
gear      int64
carb      int64
dtype: object

In [14]:
## Summary

In [15]:
mtcars.describe()

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


In [16]:
mtcars.describe(include='all')

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
count,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0,32.0
mean,20.090625,6.1875,230.721875,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125
std,6.026948,1.785922,123.938694,68.562868,0.534679,0.978457,1.786943,0.504016,0.498991,0.737804,1.6152
min,10.4,4.0,71.1,52.0,2.76,1.513,14.5,0.0,0.0,3.0,1.0
25%,15.425,4.0,120.825,96.5,3.08,2.58125,16.8925,0.0,0.0,3.0,2.0
50%,19.2,6.0,196.3,123.0,3.695,3.325,17.71,0.0,0.0,4.0,2.0
75%,22.8,8.0,326.0,180.0,3.92,3.61,18.9,1.0,1.0,4.0,4.0
max,33.9,8.0,472.0,335.0,4.93,5.424,22.9,1.0,1.0,5.0,8.0


In [17]:
mtcars.nunique()

mpg     25
cyl      3
disp    27
hp      22
drat    22
wt      29
qsec    30
vs       2
am       2
gear     3
carb     6
dtype: int64

In [18]:
mtcars.value_counts()

mpg   cyl  disp   hp   drat  wt     qsec   vs  am  gear  carb
10.4  8    460.0  215  3.00  5.424  17.82  0   0   3     4       1
           472.0  205  2.93  5.250  17.98  0   0   3     4       1
32.4  4    78.7   66   4.08  2.200  19.47  1   1   4     1       1
30.4  4    95.1   113  3.77  1.513  16.90  1   1   5     2       1
           75.7   52   4.93  1.615  18.52  1   1   4     2       1
27.3  4    79.0   66   4.08  1.935  18.90  1   1   4     1       1
26.0  4    120.3  91   4.43  2.140  16.70  0   1   5     2       1
24.4  4    146.7  62   3.69  3.190  20.00  1   0   4     2       1
22.8  4    140.8  95   3.92  3.150  22.90  1   0   4     2       1
           108.0  93   3.85  2.320  18.61  1   1   4     1       1
21.5  4    120.1  97   3.70  2.465  20.01  1   0   3     1       1
21.4  6    258.0  110  3.08  3.215  19.44  1   0   3     1       1
      4    121.0  109  4.11  2.780  18.60  1   1   4     2       1
21.0  6    160.0  110  3.90  2.875  17.02  0   1   4     4       1


In [19]:
mtcars['am'].value_counts()  #by tx

am
0    19
1    13
Name: count, dtype: int64

In [38]:
mtcars.nunique() #count of unique values per column

mpg     25
cyl      3
disp    27
hp      22
drat    22
wt      29
qsec    30
vs       2
am       2
gear     3
carb     6
dtype: int64

In [20]:
# Missing

In [21]:
mtcars.isnull().sum()

mpg     0
cyl     0
disp    0
hp      0
drat    0
wt      0
qsec    0
vs      0
am      0
gear    0
carb    0
dtype: int64

In [22]:
mtcars.notnull().sum()

mpg     32
cyl     32
disp    32
hp      32
drat    32
wt      32
qsec    32
vs      32
am      32
gear    32
carb    32
dtype: int64

In [23]:
mtcars.duplicated().sum()

0

In [31]:
mtcars.duplicated(subset = ['gear','am','cyl','carb']).sum()

18

In [35]:
mtcars[mtcars.duplicated(subset = ['gear','am','cyl','carb'], keep=False)].sort_values(by=['gear','am','cyl','carb'])
#keep=False marks all occurrences of duplicates, not just the later ones.

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Valiant,18.1,6,225.0,105,2.76,3.46,20.22,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Dodge Challenger,15.5,8,318.0,150,2.76,3.52,16.87,0,0,3,2
AMC Javelin,15.2,8,304.0,150,3.15,3.435,17.3,0,0,3,2
Pontiac Firebird,19.2,8,400.0,175,3.08,3.845,17.05,0,0,3,2
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Merc 450SL,17.3,8,275.8,180,3.07,3.73,17.6,0,0,3,3
Merc 450SLC,15.2,8,275.8,180,3.07,3.78,18.0,0,0,3,3
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4


In [36]:
mtcars[~ mtcars.duplicated(subset = ['gear','am','cyl','carb'], keep=False)].sort_values(by=['gear','am','cyl','carb'])
# non duplicates

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Toyota Corona,21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1
Ferrari Dino,19.7,6,145.0,175,3.62,2.77,15.5,0,1,5,6
Ford Pantera L,15.8,8,351.0,264,4.22,3.17,14.5,0,1,5,4
Maserati Bora,15.0,8,301.0,335,3.54,3.57,14.6,0,1,5,8


In [64]:
mtcars.drop_duplicates(subset = ['gear','am','cyl','carb'], inplace=False)
#remove duplicates by selected columns

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2
Duster 360,14.3,8,360.0,245,3.21,3.57,15.84,0,0,3,4
Merc 240D,24.4,4,146.7,62,3.69,3.19,20.0,1,0,4,2
Merc 280,19.2,6,167.6,123,3.92,3.44,18.3,1,0,4,4
Merc 450SE,16.4,8,275.8,180,3.07,4.07,17.4,0,0,3,3
Honda Civic,30.4,4,75.7,52,4.93,1.615,18.52,1,1,4,2
Toyota Corona,21.5,4,120.1,97,3.7,2.465,20.01,1,0,3,1


In [39]:
### View rows/ columns

In [41]:
mtcars['mpg'].head()

Mazda RX4            21.0
Mazda RX4 Wag        21.0
Datsun 710           22.8
Hornet 4 Drive       21.4
Hornet Sportabout    18.7
Name: mpg, dtype: float64

In [None]:
mtcars[['wt','mpg']].tail()

In [46]:
mtcars.iloc[0] #1st row

mpg      21.00
cyl       6.00
disp    160.00
hp      110.00
drat      3.90
wt        2.62
qsec     16.46
vs        0.00
am        1.00
gear      4.00
carb      4.00
Name: Mazda RX4, dtype: float64

In [45]:
mtcars.iloc[2:5]  #3rd to 5th row, all columns

Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [48]:
mtcars.loc['Datsun 710']  #row by rowname

mpg      22.80
cyl       4.00
disp    108.00
hp       93.00
drat      3.85
wt        2.32
qsec     18.61
vs        1.00
am        1.00
gear      4.00
carb      1.00
Name: Datsun 710, dtype: float64

In [52]:
mtcars.loc[:, 'mpg':'wt'].head()  # from mpg to wt columns by name

Unnamed: 0,mpg,cyl,disp,hp,drat,wt
Mazda RX4,21.0,6,160.0,110,3.9,2.62
Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875
Datsun 710,22.8,4,108.0,93,3.85,2.32
Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215
Hornet Sportabout,18.7,8,360.0,175,3.15,3.44


In [54]:
mtcars.iloc[:, 3:8].head() 

Unnamed: 0,hp,drat,wt,qsec,vs
Mazda RX4,110,3.9,2.62,16.46,0
Mazda RX4 Wag,110,3.9,2.875,17.02,0
Datsun 710,93,3.85,2.32,18.61,1
Hornet 4 Drive,110,3.08,3.215,19.44,1
Hornet Sportabout,175,3.15,3.44,17.02,0


In [56]:
mtcars.iloc[:, [3,6,9,10]].head()  #4th, 7th, 10th, 11th col

Unnamed: 0,hp,qsec,gear,carb
Mazda RX4,110,16.46,4,4
Mazda RX4 Wag,110,17.02,4,4
Datsun 710,93,18.61,4,1
Hornet 4 Drive,110,19.44,3,1
Hornet Sportabout,175,17.02,3,2


In [62]:
mtcars.iloc[[0,4,31], [3,6,9,10]].head()  #4th, 7th, 10th, 11th col with 1st, 5th, 32th row

Unnamed: 0,hp,qsec,gear,carb
Mazda RX4,110,16.46,4,4
Hornet Sportabout,175,17.02,3,2
Volvo 142E,109,18.6,4,2


In [60]:
mtcars.columns

Index(['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb'], dtype='object')

## END HERE
- Go To Advanced