# Pandas Cheatsheet

A cheatsheet for the Pandas Package



In [1]:
# Importing packages
import pandas as pd

## 1 - Input / Output Functions

### 1.1  `read_csv`

In [59]:
# Importing Data for samples
df_user = pd.read_csv('./data/df_user.csv')

df_posts = pd.read_csv('./data/df_posts.csv')

df_group = pd.read_csv("./data/df_group.csv")

## 2 - Data Manipulation Functions





### 2.1 `unique`

In [3]:
df_user['first_name'].unique()

array(['Katrina', 'Fay', 'Hayden', 'John', 'James', 'Jim', 'Sarah',
       'Pankaj', 'Tim', 'Taylor'], dtype=object)

### 2.2 `head`

- Used for diplaying the top n rows


In [4]:
df_user.head()

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
1,2,Fay,Powell,India,41
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23


In [5]:
# Specifying the number of rows to display
df_user.head(2)

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
1,2,Fay,Powell,India,41


### 2.3 `fillna()`

### 2.4 `isna()`

# 3 - SQL Operations

Reference - https://levelup.gitconnected.com/sql-v-pandas-basic-syntax-comparison-cheat-sheet-498289372d45

### 3.1 SELECT 

Selecting a columns from a dataframe

#### `SELECT first_name, last_name from df_user`

In [6]:
df_user[['first_name', 'last_name']]

Unnamed: 0,first_name,last_name
0,Katrina,Smith
1,Fay,Powell
2,Hayden,Holla
3,John,Fleming
4,James,Ramsey
5,Jim,Rand
6,Sarah,Tanner
7,Pankaj,Singh
8,Tim,Cole
9,Taylor,Oliver


#### 3.1.1 Select using `loc`

- `loc` allows to pull by index + Column name


In [7]:
df_user.loc[1,'last_name']

'Powell'

In [8]:
# Variations - 

# Returning the row in the form of a list
df_user.loc[1].to_list()

[2, 'Fay', 'Powell', 'India', 41]

#### 3.1.2 Select using `at` and `iat`

- With `at` we can use row index + column name
    - Only only 1 column name can be provided 

In [9]:
df_user.at[3, 'age']


70

- With `iat` we can use both row + column index

In [10]:
df_user.iat[1,3]

'India'

### 3.2 DISTINCT 

#### `SELECT DISTINCT FIRSt_NAME FROM DF_USER`

In [11]:
# Using unique
list(df_user['first_name'].unique())

['Katrina',
 'Fay',
 'Hayden',
 'John',
 'James',
 'Jim',
 'Sarah',
 'Pankaj',
 'Tim',
 'Taylor']

In [12]:
# Using duplicated()

df_user['first_name'][~df_user['first_name'].duplicated()]

0    Katrina
1        Fay
2     Hayden
3       John
4      James
5        Jim
6      Sarah
7     Pankaj
8        Tim
9     Taylor
Name: first_name, dtype: object

### 3.3 WHERE

#### `SELECT * FROM df_user WHERE first_name = 'Fay'`

In [13]:
df_user[df_user['first_name'] == 'Fay']

Unnamed: 0,user_id,first_name,last_name,country,age
1,2,Fay,Powell,India,41


In [14]:
df_user[df_user.first_name == 'Fay']

Unnamed: 0,user_id,first_name,last_name,country,age
1,2,Fay,Powell,India,41


#### `SELECT first_name, last_name FROM df_user`

In [15]:
df_user[['first_name', 'last_name']][df_user.first_name == 'Fay']

Unnamed: 0,first_name,last_name
1,Fay,Powell


#### `SELECT * FROM df_user WHERE first_name != 'Fay'`

In [16]:
df_user[df_user.first_name != 'Fay'].head(5)

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18


In [17]:
df_user[~(df_user.first_name == 'Fay')].head(5)

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18


#### 3.3.1 AND / OR 

- AND - `and` , `&` 
- OR  - `or` , `|`

#### `SELECT * FROM DF_USER WHERE COUNTRY = 'USA' AND AGE < 30`

In [18]:
df_user[(df_user.country == 'USA') & (df_user.age < 30)]
# Use brackets or else will face errors

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
5,6,Jim,Rand,USA,18


#### `SELECT * FROM DF_USER WHERE COUNTRY = 'USA' OR AGE < 30`

In [19]:
df_user[(df_user.country == 'USA') | (df_user.age < 30)]

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
2,3,Hayden,Holla,Mexico,28
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
7,8,Pankaj,Singh,India,18
8,9,Tim,Cole,Sweden,29


#### 3.3.2 IN

- IN - `columnname.isin()`

    - `SELECT * FROM DF_USER WHERE COUNTRY IN ('USA', 'India')`

In [20]:
df_user[df_user.country.isin(['USA', 'India'])]

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
1,2,Fay,Powell,India,41
5,6,Jim,Rand,USA,18
7,8,Pankaj,Singh,India,18
9,10,Taylor,Oliver,India,65


#### 3.3.3 LIKE

- LIKE - `columnname.str.contains("text")`

    - `SELECT * FROM DF_USER WHERE FIRST_NAME LIKE '%an%'` 

In [21]:
df_user[df_user.first_name.str.contains("an")]

Unnamed: 0,user_id,first_name,last_name,country,age
7,8,Pankaj,Singh,India,18


### 3.4 ORDER BY


- ORDER BY - `sort_values()`

    - `SELECT * FROM DF_USER ORDER BY FIRST_NAME`

In [22]:
df_user.sort_values('first_name').head(5)

Unnamed: 0,user_id,first_name,last_name,country,age
1,2,Fay,Powell,India,41
2,3,Hayden,Holla,Mexico,28
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
3,4,John,Fleming,Nigeria,70


- ORDER BY 2 or more columns

    - `SELECT * FROM DF_USER ORDER BY FIRST_NAME, AGE`

In [23]:
df_user.sort_values(['first_name', 'age']).head(5)
#The sorting happens based on the item first in the list 

Unnamed: 0,user_id,first_name,last_name,country,age
1,2,Fay,Powell,India,41
2,3,Hayden,Holla,Mexico,28
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
3,4,John,Fleming,Nigeria,70


- ORDER ASC / DESC

    - `sort_values([], ascending = [] )`

In [24]:
df_user.sort_values(['first_name', 'age'], ascending= [False, True])
    # The sorting can be set for each column separately


Unnamed: 0,user_id,first_name,last_name,country,age
8,9,Tim,Cole,Sweden,29
9,10,Taylor,Oliver,India,65
6,7,Sarah,Tanner,England,54
7,8,Pankaj,Singh,India,18
0,1,Katrina,Smith,USA,28
3,4,John,Fleming,Nigeria,70
5,6,Jim,Rand,USA,18
4,5,James,Ramsey,Canada,23
2,3,Hayden,Holla,Mexico,28
1,2,Fay,Powell,India,41


### 3.5 GROUP BY 

- Exploring operations - `COUNT, SUM, AVG, RANK, RANK + PARTITION BY, DENSE`

##### 3.5.1 COUNT 

- `SELECT COUNT(1) AS COUNT FROM df_user GROUP_BY country SORT BY COUNT DESC`

In [25]:
df_user.groupby("country").agg({'first_name':'count'}).rename(columns={'first_name': 'count'}).sort_values('count', ascending=False)

Unnamed: 0_level_0,count
country,Unnamed: 1_level_1
India,3
USA,2
Canada,1
England,1
Mexico,1
Nigeria,1
Sweden,1


#### 3.5.2 SUM

- `SELECT SUM(age) FROM df_user GROUP BY country`

In [26]:
df_user.groupby("country").agg({"age":"sum"}).sort_values('age', ascending=False)

Unnamed: 0_level_0,age
country,Unnamed: 1_level_1
India,124
Nigeria,70
England,54
USA,46
Sweden,29
Mexico,28
Canada,23


#### 3.5.3 AVG
- `SELECT AVG(age) FROM df_user GROUP BY country`

In [27]:
df_user.groupby("country").agg({'age':'mean'}).rename(columns={'age':'avg_age'}).sort_values('avg_age', ascending = False)

Unnamed: 0_level_0,avg_age
country,Unnamed: 1_level_1
Nigeria,70.0
England,54.0
India,41.333333
Sweden,29.0
Mexico,28.0
Canada,23.0
USA,23.0


#### 3.5.4 RANK

- `SELECT *, RANK() OVER (ORDER BY age DESC) FROM df_user`

In [28]:
df_user_rank = df_user.copy(deep= True)

df_user_rank['age_rank'] = df_user_rank.age.rank(ascending=False)

df_user_rank.sort_values('age_rank', ascending=False)

Unnamed: 0,user_id,first_name,last_name,country,age,age_rank
5,6,Jim,Rand,USA,18,9.5
7,8,Pankaj,Singh,India,18,9.5
4,5,James,Ramsey,Canada,23,8.0
0,1,Katrina,Smith,USA,28,6.5
2,3,Hayden,Holla,Mexico,28,6.5
8,9,Tim,Cole,Sweden,29,5.0
1,2,Fay,Powell,India,41,4.0
6,7,Sarah,Tanner,England,54,3.0
9,10,Taylor,Oliver,India,65,2.0
3,4,John,Fleming,Nigeria,70,1.0


#### 3.5.5 RANK + PARTITION BY 

- `SELECT *, RANK() OVER (PARTITION BY country ORDER BY age DESC) FROM df_user`

In [29]:
df_user_rank["age_rank_part"] = df_user_rank.groupby('country').age.rank(ascending = True)
df_user_rank.sort_values(['country','age_rank_part'], ascending = [True, True])

Unnamed: 0,user_id,first_name,last_name,country,age,age_rank,age_rank_part
4,5,James,Ramsey,Canada,23,8.0,1.0
6,7,Sarah,Tanner,England,54,3.0,1.0
7,8,Pankaj,Singh,India,18,9.5,1.0
1,2,Fay,Powell,India,41,4.0,2.0
9,10,Taylor,Oliver,India,65,2.0,3.0
2,3,Hayden,Holla,Mexico,28,6.5,1.0
3,4,John,Fleming,Nigeria,70,1.0,1.0
8,9,Tim,Cole,Sweden,29,5.0,1.0
5,6,Jim,Rand,USA,18,9.5,1.0
0,1,Katrina,Smith,USA,28,6.5,2.0


#### 3.5.6 DENSE RANK

- `SELECT *, DENSE_RANK() OVER (ORDER BY age DESC ) FROM df_user`

In [30]:
df_user_rank['age_rank_dense'] = df_user_rank.age.rank(method = 'dense', ascending= True)
df_user_rank.sort_values('age_rank_dense', ascending = True)


Unnamed: 0,user_id,first_name,last_name,country,age,age_rank,age_rank_part,age_rank_dense
5,6,Jim,Rand,USA,18,9.5,1.0,1.0
7,8,Pankaj,Singh,India,18,9.5,1.0,1.0
4,5,James,Ramsey,Canada,23,8.0,1.0,2.0
0,1,Katrina,Smith,USA,28,6.5,2.0,3.0
2,3,Hayden,Holla,Mexico,28,6.5,1.0,3.0
8,9,Tim,Cole,Sweden,29,5.0,1.0,4.0
1,2,Fay,Powell,India,41,4.0,2.0,5.0
6,7,Sarah,Tanner,England,54,3.0,1.0,6.0
9,10,Taylor,Oliver,India,65,2.0,3.0,7.0
3,4,John,Fleming,Nigeria,70,1.0,1.0,8.0


### 3.6 UPDATE

- `UPDATE df_user SET age = 35 WHERE age = 41`

##### 3.6.1 Using `loc`

In [31]:
df_user.loc[df_user.age == 41, "age"] = 35
df_user

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
1,2,Fay,Powell,India,35
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
6,7,Sarah,Tanner,England,54
7,8,Pankaj,Singh,India,18
8,9,Tim,Cole,Sweden,29
9,10,Taylor,Oliver,India,65


#### 3.6.2 Using `at`

- `at` works using the row index 

In [32]:
#Using at display
df_user.at[1,'age']

35

In [33]:
df_user.at[1,'age'] = 42
df_user

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,28
1,2,Fay,Powell,India,42
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
6,7,Sarah,Tanner,England,54
7,8,Pankaj,Singh,India,18
8,9,Tim,Cole,Sweden,29
9,10,Taylor,Oliver,India,65


#### 3.6.3 Using `iat`

- With `iat` we can specify the index for both rows and columns 

In [34]:
df_user.iat[0,4] = 32
df_user

Unnamed: 0,user_id,first_name,last_name,country,age
0,1,Katrina,Smith,USA,32
1,2,Fay,Powell,India,42
2,3,Hayden,Holla,Mexico,28
3,4,John,Fleming,Nigeria,70
4,5,James,Ramsey,Canada,23
5,6,Jim,Rand,USA,18
6,7,Sarah,Tanner,England,54
7,8,Pankaj,Singh,India,18
8,9,Tim,Cole,Sweden,29
9,10,Taylor,Oliver,India,65


### 3.7 JOIN

- Reference - https://levelup.gitconnected.com/sql-v-pandas-join-57642dc3ce76


#### 3.7.1 INNER JOIN

- Pulling the rows only which satisfied the join criteria
- ` SELECT P.*, U.FIRST_NAME, U.LAST_NAME, U.COUNTRY`
  ` FROM DF_USER U`
  ` INNER JOIN ON DF_POST P` 
  ` ON U.USER_ID = P.POST_USER_ID`

In [44]:
df_post_details = df_posts.merge(df_user[["user_id","first_name", "last_name", "country"]], left_on='post_user_id', right_on = 'user_id', how = 'inner')
df_post_details.sort_values('post_id', ascending=True).head(10)

Unnamed: 0,post_id,post_user_id,post_text,post_keywords,post_date,post_views,user_id,first_name,last_name,country
0,0,2,The Lakers game from last night was great.,"basketball , lakers , nba",4/14/2021,63,2,Fay,Powell,India
3,1,1,Lebron James is top class.,"basketball , lebron_james , nba",2/18/2019,7,1,Katrina,Smith,USA
1,2,2,Asparagus tastes OK.,"asparagus , food",3/6/2020,23,2,Fay,Powell,India
4,3,1,Spaghetti is an Italian food.,"spaghetti , food",8/24/2019,33,1,Katrina,Smith,USA
6,4,3,User 3 is not sharing interests,#spam#,4/4/2020,98,3,Hayden,Holla,Mexico
9,5,5,Sachin ig a cricket god,cricket,11/18/2019,72,5,James,Ramsey,Canada
10,6,5,She gave him a piece of paper.,person,12/23/2019,64,5,James,Ramsey,Canada
12,7,8,The game became exciting.,game,1/31/2021,42,8,Pankaj,Singh,India
14,8,9,He's really selfish.,personal behaviour,6/4/2020,47,9,Tim,Cole,Sweden
7,9,3,She was already in love with him.,personal behaviour,12/27/2019,55,3,Hayden,Holla,Mexico


In [61]:
# JOIN WITH 2 columns with suffixes

    # The Suffixes are attached to columns which are present in both the tables with the same name but are not part of the join
    # In the example below we can see post_views being present in each table
df_post_grp = df_posts.merge(df_group, left_on=['post_id','post_user_id'], right_on=['post_id', 'post_user_id'], how='inner',suffixes=["_pst", "_grp"])
df_post_grp




Unnamed: 0,post_id,post_user_id,post_text,post_keywords,post_date,post_views_pst,group_id,group_name,post_views_grp
0,0,2,The Lakers game from last night was great.,"basketball , lakers , nba",4/14/2021,63,1,Travelers,10
1,1,1,Lebron James is top class.,"basketball , lebron_james , nba",2/18/2019,7,2,Watches,100
2,2,2,Asparagus tastes OK.,"asparagus , food",3/6/2020,23,3,Photography,4
3,3,1,Spaghetti is an Italian food.,"spaghetti , food",8/24/2019,33,4,Exercise,5
4,4,3,User 3 is not sharing interests,#spam#,4/4/2020,98,1,Travelers,2000
5,5,5,Sachin ig a cricket god,cricket,11/18/2019,72,2,Watches,32


#### 3.7.2 LEFT/RIGHT OUTER JOIN

- Pulling all of the records in the LEFT/RIGHT (depending on LEFT/JOIN join respectively) along with the joined data from the RIGHT/LEFT table respectively

In [63]:
df_user_group = df_user.merge(df_group, left_on='user_id', right_on='post_user_id', how='left')
df_user_group

Unnamed: 0,user_id,first_name,last_name,country,age,group_id,group_name,post_id,post_user_id,post_views
0,1,Katrina,Smith,USA,28,2.0,Watches,1.0,1.0,100.0
1,1,Katrina,Smith,USA,28,4.0,Exercise,3.0,1.0,5.0
2,2,Fay,Powell,India,41,1.0,Travelers,0.0,2.0,10.0
3,2,Fay,Powell,India,41,3.0,Photography,2.0,2.0,4.0
4,3,Hayden,Holla,Mexico,28,1.0,Travelers,4.0,3.0,2000.0
5,4,John,Fleming,Nigeria,70,,,,,
6,5,James,Ramsey,Canada,23,2.0,Watches,5.0,5.0,32.0
7,6,Jim,Rand,USA,18,,,,,
8,7,Sarah,Tanner,England,54,,,,,
9,8,Pankaj,Singh,India,18,,,,,


#### 3.7.3 FULL OUTER JOIN

- Pulling all of the records, even the ones which did not join. 

In [67]:
df_group_post_complete = df_group.merge(df_posts,left_on=['post_id','post_user_id'], right_on=['post_id', 'post_user_id'], how='outer',suffixes=["_pst", "_grp"])
df_group_post_complete

Unnamed: 0,group_id,group_name,post_id,post_user_id,post_views_pst,post_text,post_keywords,post_date,post_views_grp
0,1.0,Travelers,0,2,10.0,The Lakers game from last night was great.,"basketball , lakers , nba",4/14/2021,63
1,2.0,Watches,1,1,100.0,Lebron James is top class.,"basketball , lebron_james , nba",2/18/2019,7
2,3.0,Photography,2,2,4.0,Asparagus tastes OK.,"asparagus , food",3/6/2020,23
3,4.0,Exercise,3,1,5.0,Spaghetti is an Italian food.,"spaghetti , food",8/24/2019,33
4,1.0,Travelers,4,3,2000.0,User 3 is not sharing interests,#spam#,4/4/2020,98
5,2.0,Watches,5,5,32.0,Sachin ig a cricket god,cricket,11/18/2019,72
6,,,6,5,,She gave him a piece of paper.,person,12/23/2019,64
7,,,7,8,,The game became exciting.,game,1/31/2021,42
8,,,8,9,,He's really selfish.,personal behaviour,6/4/2020,47
9,,,9,3,,She was already in love with him.,personal behaviour,12/27/2019,55


### 3.8 UNION

- Appending 2 or more dataframes 

#### 3.8.1 using `append`

In [69]:
df_post_usa = df_post_details[df_post_details.country == "USA"]
df_post_ind = df_post_details[df_post_details.country == "India"]

df_post_us_ind = df_post_usa.append(df_post_ind)
df_post_us_ind

Unnamed: 0,post_id,post_user_id,post_text,post_keywords,post_date,post_views,user_id,first_name,last_name,country
3,1,1,Lebron James is top class.,"basketball , lebron_james , nba",2/18/2019,7,1,Katrina,Smith,USA
4,3,1,Spaghetti is an Italian food.,"spaghetti , food",8/24/2019,33,1,Katrina,Smith,USA
5,18,1,What's Ken doing now?,personal interest,3/23/2021,37,1,Katrina,Smith,USA
19,13,6,He had a bitter experience.,abuse,12/24/2020,49,6,Jim,Rand,USA
0,0,2,The Lakers game from last night was great.,"basketball , lakers , nba",4/14/2021,63,2,Fay,Powell,India
1,2,2,Asparagus tastes OK.,"asparagus , food",3/6/2020,23,2,Fay,Powell,India
2,19,2,You can't turn around here. I think you should...,"travel , cars",3/17/2021,2,2,Fay,Powell,India
12,7,8,The game became exciting.,game,1/31/2021,42,8,Pankaj,Singh,India
13,15,8,They agreed to start early.,work,4/28/2020,59,8,Pankaj,Singh,India
16,10,10,She didn't intend to let him kiss her.,abuse,4/6/2020,78,10,Taylor,Oliver,India


#### 3.8.2 Using `concat`

In [73]:
df_post_us_ind_2 = pd.concat([df_post_ind,df_post_usa])
df_post_us_ind_2

Unnamed: 0,post_id,post_user_id,post_text,post_keywords,post_date,post_views,user_id,first_name,last_name,country
0,0,2,The Lakers game from last night was great.,"basketball , lakers , nba",4/14/2021,63,2,Fay,Powell,India
1,2,2,Asparagus tastes OK.,"asparagus , food",3/6/2020,23,2,Fay,Powell,India
2,19,2,You can't turn around here. I think you should...,"travel , cars",3/17/2021,2,2,Fay,Powell,India
12,7,8,The game became exciting.,game,1/31/2021,42,8,Pankaj,Singh,India
13,15,8,They agreed to start early.,work,4/28/2020,59,8,Pankaj,Singh,India
16,10,10,She didn't intend to let him kiss her.,abuse,4/6/2020,78,10,Taylor,Oliver,India
17,17,10,Which air conditioner do you think is the most...,electronics,2/9/2020,52,10,Taylor,Oliver,India
3,1,1,Lebron James is top class.,"basketball , lebron_james , nba",2/18/2019,7,1,Katrina,Smith,USA
4,3,1,Spaghetti is an Italian food.,"spaghetti , food",8/24/2019,33,1,Katrina,Smith,USA
5,18,1,What's Ken doing now?,personal interest,3/23/2021,37,1,Katrina,Smith,USA


## 4 - Dataframe Summarization 

https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf

In [39]:
dict(df_user['country'].value_counts())

{'India': 3,
 'USA': 2,
 'Mexico': 1,
 'Nigeria': 1,
 'Canada': 1,
 'England': 1,
 'Sweden': 1}

In [35]:
#Archive

'''
from sklearn.datasets import load_iris, load_diabetes, load_digits, load_linnerud, load_wine, load_breast_cancer, load_boston
df_iris = load_iris()
df_iris = pd.DataFrame(df_iris.data, columns= df_iris.feature_names)

df_diabetes = load_diabetes()
df_diabetes = pd.DataFrame(df_diabetes.data, columns= df_diabetes.feature_names)

df_digits = load_digits()
df_digits = pd.DataFrame(df_digits.data, columns= df_digits.feature_names)

df_wine = load_wine()
df_wine = pd.DataFrame(df_wine.data, columns= df_wine.feature_names)

df_linn = load_linnerud()
df_linn = pd.DataFrame(df_linn.data, columns= df_linn.feature_names)

df_cancer = load_breast_cancer()
df_cancer = pd.DataFrame(df_cancer.data, columns= df_cancer.feature_names)

df_boston = load_boston()
df_boston = pd.DataFrame(df_boston.data, columns= df_boston.feature_names)
'''

'\nfrom sklearn.datasets import load_iris, load_diabetes, load_digits, load_linnerud, load_wine, load_breast_cancer, load_boston\ndf_iris = load_iris()\ndf_iris = pd.DataFrame(df_iris.data, columns= df_iris.feature_names)\n\ndf_diabetes = load_diabetes()\ndf_diabetes = pd.DataFrame(df_diabetes.data, columns= df_diabetes.feature_names)\n\ndf_digits = load_digits()\ndf_digits = pd.DataFrame(df_digits.data, columns= df_digits.feature_names)\n\ndf_wine = load_wine()\ndf_wine = pd.DataFrame(df_wine.data, columns= df_wine.feature_names)\n\ndf_linn = load_linnerud()\ndf_linn = pd.DataFrame(df_linn.data, columns= df_linn.feature_names)\n\ndf_cancer = load_breast_cancer()\ndf_cancer = pd.DataFrame(df_cancer.data, columns= df_cancer.feature_names)\n\ndf_boston = load_boston()\ndf_boston = pd.DataFrame(df_boston.data, columns= df_boston.feature_names)\n'