# Pandas Lesson

## DataFrame

In [1]:
import pandas as pd

In [11]:
data_lst = [{'a': 1, 'b': 2, 'c': 3}, {'a': 4, 'b': 5, 'c': 6, 'd': 7}]
df = pd.DataFrame(data_lst)
df

Unnamed: 0,a,b,c,d
0,1,2,3,
1,4,5,6,7.0


In [12]:
data_lst = [{'a': 1}, {'b': 2}, {'c': 3}]
df = pd.DataFrame(data_lst)
df

Unnamed: 0,a,b,c
0,1.0,,
1,,2.0,
2,,,3.0


**NaN** - stands for *Not a Number*

The code below will **fail** because there are fewer columns defined than the data values

In [8]:
data_vals = [[1, 2, 3], [1, 2, 3, 4]]
data_cols = ['a', 'b', 'c']
df = pd.DataFrame(data=data_vals, columns=data_cols)
df

AssertionError: 3 columns passed, passed data had 4 columns

The code below will **work** because there are fewer columns defined than the data values

In [13]:
data_vals = [[1, 2, 3], [400, 500, 600, 700]]
data_cols = ['a', 'b', 'c', 'd']
df = pd.DataFrame(data=data_vals, columns=data_cols)
df

Unnamed: 0,a,b,c,d
0,1,2,3,
1,400,500,600,700.0


## Read a File

In [23]:
df = pd.read_csv('abc.csv')
df

Unnamed: 0,a,b,c
0,23,56,300
1,30,100,1010
2,80,21,2


In [24]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
a    3 non-null int64
b    3 non-null int64
c    3 non-null int64
dtypes: int64(3)
memory usage: 152.0 bytes


In [26]:
df = pd.read_csv('abc.csv', header=None)

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
0    4 non-null object
1    4 non-null object
2    4 non-null object
dtypes: object(3)
memory usage: 176.0+ bytes


The data type above changed from *int64* to *object*

In [28]:
df.shape()

TypeError: 'tuple' object is not callable

In [29]:
df.head() #shows the top n rows of the file

Unnamed: 0,0,1,2
0,a,b,c
1,23,56,300
2,30,100,1010
3,80,21,2


In [30]:
df.tail() #shows the last n rows of the file

Unnamed: 0,0,1,2
0,a,b,c
1,23,56,300
2,30,100,1010
3,80,21,2


In [31]:
df.describe()

Unnamed: 0,0,1,2
count,4,4,4
unique,4,4,4
top,30,56,1010
freq,1,1,1


In [33]:
df = pd.read_csv('abc.csv')

In [34]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
a    3 non-null int64
b    3 non-null int64
c    3 non-null int64
dtypes: int64(3)
memory usage: 152.0 bytes


In [35]:
df.columns

Index(['a', 'b', 'c'], dtype='object')

### Force column type upon file read

In [38]:
import numpy as np
abc_df = pd.read_csv('abc.csv', dtype={'a': np.float32})
abc_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
a    3 non-null float32
b    3 non-null int64
c    3 non-null int64
dtypes: float32(1), int64(2)
memory usage: 140.0 bytes


### Slicing data

In [43]:
wine_df = pd.read_csv('winequality-red.csv', delimiter=';')
wine_df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
5,7.4,0.660,0.00,1.8,0.075,13.0,40.0,0.99780,3.51,0.56,9.4,5
6,7.9,0.600,0.06,1.6,0.069,15.0,59.0,0.99640,3.30,0.46,9.4,5
7,7.3,0.650,0.00,1.2,0.065,15.0,21.0,0.99460,3.39,0.47,10.0,7
8,7.8,0.580,0.02,2.0,0.073,9.0,18.0,0.99680,3.36,0.57,9.5,7
9,7.5,0.500,0.36,6.1,0.071,17.0,102.0,0.99780,3.35,0.80,10.5,5


In [45]:
wine_df[0:5] #top 5 rows

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [54]:
wine_df[4::-1] #bottom 5 rows

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


### Accessing columns

In [57]:
wine_df.pH[:5]

0    3.51
1    3.20
2    3.26
3    3.16
4    3.51
Name: pH, dtype: float64

In [62]:
wine_df['fixed acidity'][:5] #if name has spaces, use ['']

0     7.4
1     7.8
2     7.8
3    11.2
4     7.4
Name: fixed acidity, dtype: float64

In [67]:
dummy_df = pd.DataFrame([[1,2,3],[4,5,6]], columns=['a','b','c'], index=['foo','bar'])
dummy_df

Unnamed: 0,a,b,c
foo,1,2,3
bar,4,5,6


In [70]:
dummy_df.loc['foo']

a    1
b    2
c    3
Name: foo, dtype: int64

In [74]:
dummy_df.loc['foo',['b','c']]

b    2
c    3
Name: foo, dtype: int64

In [75]:
wine_df.index[2:4]

RangeIndex(start=2, stop=4, step=1)

In [79]:
wine_df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [78]:
wine_df.loc[wine_df.index[2:4], ['chlorides', 'pH']]

Unnamed: 0,chlorides,pH
2,0.092,3.26
3,0.075,3.16


In [82]:
dummy_df.loc[['foo', 'bar'], dummy_df.columns[[0,2]]]

Unnamed: 0,a,c
foo,1,3
bar,4,6


### Querying data

In [89]:
(wine_df['chlorides'] >= 0.08)

0       False
1        True
2        True
3       False
4       False
5       False
6       False
7       False
8       False
9       False
10       True
11      False
12       True
13       True
14       True
15       True
16       True
17       True
18       True
19       True
20      False
21       True
22       True
23       True
24       True
25       True
26       True
27       True
28       True
29       True
        ...  
1569    False
1570     True
1571    False
1572    False
1573    False
1574    False
1575    False
1576     True
1577    False
1578     True
1579    False
1580    False
1581    False
1582    False
1583    False
1584    False
1585    False
1586    False
1587    False
1588    False
1589    False
1590    False
1591     True
1592    False
1593    False
1594     True
1595    False
1596    False
1597    False
1598    False
Name: chlorides, Length: 1599, dtype: bool

In [86]:
wine_df[(wine_df['chlorides'] >= 0.04) & (wine_df['chlorides'] < 0.045)]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
802,5.1,0.585,0.0,1.7,0.044,14.0,86.0,0.99264,3.56,0.94,12.9,7
806,8.4,0.25,0.39,2.0,0.041,4.0,10.0,0.99386,3.27,0.71,12.5,7
1142,6.9,0.45,0.11,2.4,0.043,6.0,12.0,0.99354,3.3,0.65,11.4,6
1151,6.1,0.58,0.23,2.5,0.044,16.0,70.0,0.99352,3.46,0.65,12.5,6
1157,5.1,0.51,0.18,2.1,0.042,16.0,101.0,0.9924,3.46,0.87,12.9,7
1178,5.6,0.915,0.0,2.1,0.041,17.0,78.0,0.99346,3.68,0.73,11.4,5
1219,9.0,0.39,0.4,1.3,0.044,25.0,50.0,0.99478,3.2,0.83,10.9,6
1228,5.1,0.42,0.0,1.8,0.044,18.0,88.0,0.99157,3.68,0.73,13.6,7
1269,5.5,0.49,0.03,1.8,0.044,28.0,87.0,0.9908,3.5,0.82,14.0,8
1316,5.4,0.74,0.0,1.2,0.041,16.0,46.0,0.99258,4.01,0.59,12.5,6


In [90]:
wine_df.query('chlorides >= 0.04 & chlorides < 0.045') #same result as above

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
802,5.1,0.585,0.0,1.7,0.044,14.0,86.0,0.99264,3.56,0.94,12.9,7
806,8.4,0.25,0.39,2.0,0.041,4.0,10.0,0.99386,3.27,0.71,12.5,7
1142,6.9,0.45,0.11,2.4,0.043,6.0,12.0,0.99354,3.3,0.65,11.4,6
1151,6.1,0.58,0.23,2.5,0.044,16.0,70.0,0.99352,3.46,0.65,12.5,6
1157,5.1,0.51,0.18,2.1,0.042,16.0,101.0,0.9924,3.46,0.87,12.9,7
1178,5.6,0.915,0.0,2.1,0.041,17.0,78.0,0.99346,3.68,0.73,11.4,5
1219,9.0,0.39,0.4,1.3,0.044,25.0,50.0,0.99478,3.2,0.83,10.9,6
1228,5.1,0.42,0.0,1.8,0.044,18.0,88.0,0.99157,3.68,0.73,13.6,7
1269,5.5,0.49,0.03,1.8,0.044,28.0,87.0,0.9908,3.5,0.82,14.0,8
1316,5.4,0.74,0.0,1.2,0.041,16.0,46.0,0.99258,4.01,0.59,12.5,6


### Group By

In [97]:
wine_df['quality'].unique()

array([5, 6, 7, 4, 8, 3], dtype=int64)

In [93]:
group = wine_df.groupby('quality')

In [96]:
list(group)

[(3,
        fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
  459            11.6             0.580         0.66            2.20      0.074   
  517            10.4             0.610         0.49            2.10      0.200   
  690             7.4             1.185         0.00            4.25      0.097   
  832            10.4             0.440         0.42            1.50      0.145   
  899             8.3             1.020         0.02            3.40      0.084   
  1299            7.6             1.580         0.00            2.10      0.137   
  1374            6.8             0.815         0.00            1.20      0.267   
  1469            7.3             0.980         0.05            2.10      0.061   
  1478            7.1             0.875         0.05            5.70      0.082   
  1505            6.7             0.760         0.02            1.80      0.078   
  
        free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \


In [98]:
group.max()

Unnamed: 0_level_0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
quality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,11.6,1.58,0.66,5.7,0.267,34.0,49.0,1.0008,3.63,0.86,11.0
4,12.5,1.13,1.0,12.9,0.61,41.0,119.0,1.001,3.9,2.0,13.1
5,15.9,1.33,0.79,15.5,0.611,68.0,155.0,1.00315,3.74,1.98,14.9
6,14.3,1.04,0.78,15.4,0.415,72.0,165.0,1.00369,4.01,1.95,14.0
7,15.6,0.915,0.76,8.9,0.358,54.0,289.0,1.0032,3.78,1.36,14.0
8,12.6,0.85,0.72,6.4,0.086,42.0,88.0,0.9988,3.72,1.1,14.0


In [99]:
group.mean()

Unnamed: 0_level_0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
quality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3,8.36,0.8845,0.171,2.635,0.1225,11.0,24.9,0.997464,3.398,0.57,9.955
4,7.779245,0.693962,0.174151,2.69434,0.090679,12.264151,36.245283,0.996542,3.381509,0.596415,10.265094
5,8.167254,0.577041,0.243686,2.528855,0.092736,16.983847,56.51395,0.997104,3.304949,0.620969,9.899706
6,8.347179,0.497484,0.273824,2.477194,0.084956,15.711599,40.869906,0.996615,3.318072,0.675329,10.629519
7,8.872362,0.40392,0.375176,2.720603,0.076588,14.045226,35.020101,0.996104,3.290754,0.741256,11.465913
8,8.566667,0.423333,0.391111,2.577778,0.068444,13.277778,33.444444,0.995212,3.267222,0.767778,12.094444


In [101]:
wine_df.query('pH == 2.89 & quality == 5')

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
650,10.7,0.43,0.39,2.2,0.106,8.0,32.0,0.9986,2.89,0.5,9.6,5
656,10.7,0.43,0.39,2.2,0.106,8.0,32.0,0.9986,2.89,0.5,9.6,5


In [102]:
wine_df.sort_values('quality', ascending=False)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
495,10.7,0.350,0.53,2.60,0.070,5.0,16.0,0.99720,3.15,0.65,11.00,8
1403,7.2,0.330,0.33,1.70,0.061,3.0,13.0,0.99600,3.23,1.10,10.00,8
390,5.6,0.850,0.05,1.40,0.045,12.0,88.0,0.99240,3.56,0.82,12.90,8
1061,9.1,0.400,0.50,1.80,0.071,7.0,16.0,0.99462,3.21,0.69,12.50,8
1202,8.6,0.420,0.39,1.80,0.068,6.0,12.0,0.99516,3.35,0.69,11.70,8
828,7.8,0.570,0.09,2.30,0.065,34.0,45.0,0.99417,3.46,0.74,12.70,8
481,9.4,0.300,0.56,2.80,0.080,6.0,17.0,0.99640,3.15,0.92,11.70,8
455,11.3,0.620,0.67,5.20,0.086,6.0,19.0,0.99880,3.22,0.69,13.40,8
1449,7.2,0.380,0.31,2.00,0.056,15.0,29.0,0.99472,3.23,0.76,11.30,8
440,12.6,0.310,0.72,2.20,0.072,6.0,29.0,0.99870,2.88,0.82,9.80,8


In [104]:
wine_df.sort_values(['quality', 'alcohol'], ascending=[True, False])

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
899,8.3,1.020,0.02,3.40,0.084,6.0,11.0,0.99892,3.48,0.49,11.00,3
1299,7.6,1.580,0.00,2.10,0.137,5.0,9.0,0.99476,3.50,0.40,10.90,3
690,7.4,1.185,0.00,4.25,0.097,5.0,14.0,0.99660,3.63,0.54,10.70,3
1478,7.1,0.875,0.05,5.70,0.082,3.0,14.0,0.99808,3.40,0.52,10.20,3
1505,6.7,0.760,0.02,1.80,0.078,6.0,12.0,0.99600,3.55,0.63,9.95,3
832,10.4,0.440,0.42,1.50,0.145,34.0,48.0,0.99832,3.38,0.86,9.90,3
1374,6.8,0.815,0.00,1.20,0.267,16.0,29.0,0.99471,3.32,0.51,9.80,3
1469,7.3,0.980,0.05,2.10,0.061,20.0,49.0,0.99705,3.31,0.55,9.70,3
459,11.6,0.580,0.66,2.20,0.074,10.0,47.0,1.00080,3.25,0.57,9.00,3
517,10.4,0.610,0.49,2.10,0.200,5.0,16.0,0.99940,3.16,0.63,8.40,3


### Renaming, Dropping Columns

In [105]:
df = wine_df

In [108]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [110]:
df['non_free_sulfur']= df['total sulfur dioxide'] - df['free sulfur dioxide']
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality,non_free_sulfur
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,42.0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,39.0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,43.0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0


In [113]:
df.rename(columns={'total sulfur dioxide':'total_sulfur_dioxide', 'free sulfur dioxide':'free_sulfur_dioxide'}, inplace=True)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,non_free_sulfur
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,42.0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,39.0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,43.0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0


In [121]:
df.eval('non_free_sulfur_2 = total_sulfur_dioxide - free_sulfur_dioxide', inplace=True)
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,non_free_sulfur,non_free_sulfur_2
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0,23.0
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,42.0,42.0
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,39.0,39.0
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,43.0,43.0
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,23.0,23.0


In [117]:
df.drop('non_free_sulfur_2', axis=1) #drop column (i.e, axis 1) 'non_free_sulfur_2'
df.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality', 'non_free_sulfur',
       'non_free_sulfur_2'],
      dtype='object')

In [122]:
df.drop('non_free_sulfur_2', axis=1, inplace=True) #drop column (i.e, axis 1) 'non_free_sulfur_2'
df.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality', 'non_free_sulfur'],
      dtype='object')

In [124]:
#create a copy of a df without certain columns
df2 = df.drop('non_free_sulfur', axis=1) #drop column (i.e, axis 1) 'non_free_sulfur_2'
df2.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [None]:
#df.fillna
#df['column name'].fillna

#df.dropna
#df['column name'].dropna

### Joins

In [None]:
#df.join()
#df.merge(other_df, how={'left', 'outer','join','union'}, axis=1, on='column name')
#pd.concat([df1, df2]), join={'left', 'outer','join','union'}, axis=1 or 0)

### Save to File

In [None]:
#df.to_csv()
#df.to_excel()
#df.to_... many more others html, etc.

### Pivot

In [None]:
import numpy as np
#pd.pivot(df, values='column name', index ='column name', columns='column name', aggfunc=np.max)

### Plotting

In [None]:
import matplot.lib

df.plot(kind='hist or scatter or box or bar', x='column name', y='column name')