In [6]:
import pandas as pd
import numpy as np

# 1.Create a DataFrame from a dictionary 

In [5]:
data = {'grammer': ['Python', 'C', 'Java', 'GO', np.nan, 'SQL', 'PHP', 'Python'], 
        'score':[1, 2, np.nan, 4, 5, 6, 7, 10]}

In [10]:
df = pd.DataFrame(data)
df

Unnamed: 0,grammer,score
0,Python,1.0
1,C,2.0
2,Java,
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0
7,Python,10.0


# 2. Get the rows have string 'Python'

In [7]:
df['grammer'] == 'Python'

0     True
1    False
2    False
3    False
4    False
5    False
6    False
7     True
Name: grammer, dtype: bool

In [8]:
df[df['grammer'] == 'Python']

Unnamed: 0,grammer,score
0,Python,1.0
7,Python,10.0


In [12]:
df['grammer'].str.contains('Python')

0     True
1    False
2    False
3    False
4      NaN
5    False
6    False
7     True
Name: grammer, dtype: object

In [14]:
results = df['grammer'].str.contains("Python")
results.fillna(value=False,inplace = True)
df[results]

Unnamed: 0,grammer,score
0,Python,1.0
7,Python,10.0


# 3. Get all the column names

In [15]:
print(df.columns)

Index(['grammer', 'score'], dtype='object')


# 4. Change the column name of the seconf column to 'popularity'

In [17]:
df.rename(columns = {'score': 'popularity'}, inplace = True)
df

Unnamed: 0,grammer,popularity
0,Python,1.0
1,C,2.0
2,Java,
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0
7,Python,10.0


In [26]:
df.columns = ['grammer', 'popularity']
df

Unnamed: 0,grammer,popularity
0,Python,1.0
1,C,2.0
2,Java,
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0
7,Python,10.0


### *5. Count the number of times of occurence for each grammer

In [27]:
df['grammer'].value_counts()

Python    2
SQL       1
Java      1
GO        1
PHP       1
C         1
Name: grammer, dtype: int64

### *6. Filling in NaN by interpolation of up and down neighbours

In [32]:
interpolation = df['popularity'].interpolate()
interpolation

0     1.0
1     2.0
2     3.0
3     4.0
4     5.0
5     6.0
6     7.0
7    10.0
Name: popularity, dtype: float64

In [33]:
df['popularity'] = df['popularity'].fillna(interpolation)
df

Unnamed: 0,grammer,popularity
0,Python,1.0
1,C,2.0
2,Java,3.0
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0
7,Python,10.0


### 7. Select rows with popularity > 3

In [35]:
df[df['popularity']>3]

Unnamed: 0,grammer,popularity
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0
7,Python,10.0


### 8. Remove the duplicates in column 'grammer'

In [36]:
df.drop_duplicates(['grammer'])

Unnamed: 0,grammer,popularity
0,Python,1.0
1,C,2.0
2,Java,3.0
3,GO,4.0
4,,5.0
5,SQL,6.0
6,PHP,7.0


In [37]:
### 9. Calculate the average of column 'popularity'

In [38]:
df.popularity.mean()

4.75

In [39]:
### 10. Change column 'grammer' to a list

In [40]:
df.popularity.to_list()

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 10.0]

In [41]:
### 11. Check the dimension of the DataFrame

In [43]:
df.shape

(8, 2)

### *12. Select the rows with value of popularity >3 and <7 

In [50]:
df[(df.popularity>3) & (df.popularity<7)]

Unnamed: 0,grammer,popularity
3,GO,4.0
4,,5.0
5,SQL,6.0


In [51]:
### 13. Save DF as Excel

In [52]:
df.to_excel('test.xlsx')

In [53]:
### *14. Exchange the positions of 2 columns

In [54]:
cols = df.columns[[1, 0]]
df = df[cols]
df

Unnamed: 0,popularity,grammer
0,1.0,Python
1,2.0,C
2,3.0,Java
3,4.0,GO
4,5.0,
5,6.0,SQL
6,7.0,PHP
7,10.0,Python


In [55]:
### 15. Get the row where the biggest popularity locates

In [56]:
df[df['popularity'] == df['popularity'].max()]

Unnamed: 0,popularity,grammer
7,10.0,Python


In [57]:
### *16. Get the last 5 rows

In [58]:
df.tail()

Unnamed: 0,popularity,grammer
3,4.0,GO
4,5.0,
5,6.0,SQL
6,7.0,PHP
7,10.0,Python


In [59]:
### *17.Delete the last row

In [62]:
df.drop([len(df)-1], inplace = True)
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,popularity,grammer
0,1.0,Python
1,2.0,C
2,3.0,Java
3,4.0,GO
4,5.0,
5,6.0,SQL


In [63]:
### *18.Insert a row

In [64]:
row={'grammer':'Perl','popularity':6.6}
df = df.append(row,ignore_index=True)
df

Unnamed: 0,popularity,grammer
0,1.0,Python
1,2.0,C
2,3.0,Java
3,4.0,GO
4,5.0,
5,6.0,SQL
6,6.6,Perl


In [65]:
### 19.Sort by popularity

In [66]:
df.sort_values("popularity",inplace=True)
df

Unnamed: 0,popularity,grammer
0,1.0,Python
1,2.0,C
2,3.0,Java
3,4.0,GO
4,5.0,
5,6.0,SQL
6,6.6,Perl


In [67]:
### *20.Calculate the length of each string in column 'grammer'

In [69]:
df['grammer'] = df['grammer'].fillna('R')
df['len_str'] = df['grammer'].map(lambda x: len(x))
df

Unnamed: 0,popularity,grammer,len_str
0,1.0,Python,6
1,2.0,C,1
2,3.0,Java,4
3,4.0,GO,2
4,5.0,R,1
5,6.0,SQL,3
6,6.6,Perl,4


In [None]:
### 21.