In [1]:
import pandas as pd

### 1. Create dataframe from list

In [2]:
lst = ['Swiggy', 'Zomato', 'Uber', 'Ola', 
            'Rapido', 'Blinkit', 'Amazon']

df = pd.DataFrame(lst)
df

Unnamed: 0,0
0,Swiggy
1,Zomato
2,Uber
3,Ola
4,Rapido
5,Blinkit
6,Amazon


### 2. Create dataframe from dictionary

In [3]:
details = {
    'Name' : ['Ankit', 'Aishwarya', 'Shaurya', 'Shivangi'],
    'Age' : [23, 21, 22, 21],
    'University' : ['IITM', 'NITK', 'VITC', 'MIT'],
}
  
df = pd.DataFrame(details)
  
df

Unnamed: 0,Name,Age,University
0,Ankit,23,IITM
1,Aishwarya,21,NITK
2,Shaurya,22,VITC
3,Shivangi,21,MIT


### 3. Create new column by concatenation of two existing column in a dataframe

In [4]:
technologies = ({
     'Courses':["Spark","PySpark","Hadoop","Python","pandas"],
     'Fee' :[20000,25000,26000,22000,24000],
     'Duration':['30days','40days','35days','40days','60days'],
     'Discount':[1000,1500,2500,2100,2000]
               })
df = pd.DataFrame(technologies)
print(df)

   Courses    Fee Duration  Discount
0    Spark  20000   30days      1000
1  PySpark  25000   40days      1500
2   Hadoop  26000   35days      2500
3   Python  22000   40days      2100
4   pandas  24000   60days      2000


In [5]:
df["Period"] = df['Courses'].astype(str) +"-"+ df["Duration"]
print(df)

   Courses    Fee Duration  Discount          Period
0    Spark  20000   30days      1000    Spark-30days
1  PySpark  25000   40days      1500  PySpark-40days
2   Hadoop  26000   35days      2500   Hadoop-35days
3   Python  22000   40days      2100   Python-40days
4   pandas  24000   60days      2000   pandas-60days


### 4. Drop column from dataframe

In [6]:
data = {
    'A': ['A1', 'A2', 'A3', 'A4', 'A5'],
    'B': ['B1', 'B2', 'B3', 'B4', 'B5'],
    'C': ['C1', 'C2', 'C3', 'C4', 'C5'],
    'D': ['D1', 'D2', 'D3', 'D4', 'D5'],
    'E': ['E1', 'E2', 'E3', 'E4', 'E5']}
  
df = pd.DataFrame(data)
  
df.drop(['A'], axis=1)

Unnamed: 0,B,C,D,E
0,B1,C1,D1,E1
1,B2,C2,D2,E2
2,B3,C3,D3,E3
3,B4,C4,D4,E4
4,B5,C5,D5,E5


### 5. Sort dataframe using multiple columns

In [7]:
technologies = ({
    'Courses':["Spark","Hadoop","pandas","Oracle","Java"],
    'Fee' :[20000,25000,26000,22000,20000],
    'Duration':['30days','35days','40days','50days','60days'],
    'Discount':[1000,2300,1500,1200,2500]
               })
df = pd.DataFrame(technologies, index = ['r1','r2','r3','r4','r0'])
print(df)

   Courses    Fee Duration  Discount
r1   Spark  20000   30days      1000
r2  Hadoop  25000   35days      2300
r3  pandas  26000   40days      1500
r4  Oracle  22000   50days      1200
r0    Java  20000   60days      2500


In [8]:
df2 = df.sort_values(['Fee', 'Discount'])
print(df2)

   Courses    Fee Duration  Discount
r1   Spark  20000   30days      1000
r0    Java  20000   60days      2500
r4  Oracle  22000   50days      1200
r2  Hadoop  25000   35days      2300
r3  pandas  26000   40days      1500


### 6. Create custom index (use existing column as the custome index) instead of default index in dataframe

In [14]:
data = pd.read_csv('train.csv')
data.set_index('Name', inplace = True)
data

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Braund, Mr. Owen Harris",1,0,3,male,22.0,1,0,A/5 21171,7.2500,,S
"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",2,1,1,female,38.0,1,0,PC 17599,71.2833,C85,C
"Heikkinen, Miss. Laina",3,1,3,female,26.0,0,0,STON/O2. 3101282,7.9250,,S
"Futrelle, Mrs. Jacques Heath (Lily May Peel)",4,1,1,female,35.0,1,0,113803,53.1000,C123,S
"Allen, Mr. William Henry",5,0,3,male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...
"Montvila, Rev. Juozas",887,0,2,male,27.0,0,0,211536,13.0000,,S
"Graham, Miss. Margaret Edith",888,1,1,female,19.0,0,0,112053,30.0000,B42,S
"Johnston, Miss. Catherine Helen ""Carrie""",889,0,3,female,,1,2,W./C. 6607,23.4500,,S
"Behr, Mr. Karl Howell",890,1,1,male,26.0,0,0,111369,30.0000,C148,C


### 7. Apply user defined function on dataframe (using lambda)

In [16]:
values= [['Rohan',455],['Elvish',250],['Deepak',495],
         ['Soni',400],['Radhika',350],['Vansh',450]]
 
df = pd.DataFrame(values,columns=['Name','Total_Marks'])

df = df.assign(Percentage = lambda x: (x['Total_Marks'] /500 * 100))
df

Unnamed: 0,Name,Total_Marks,Percentage
0,Rohan,455,91.0
1,Elvish,250,50.0
2,Deepak,495,99.0
3,Soni,400,80.0
4,Radhika,350,70.0
5,Vansh,450,90.0


### 8. Write a Pandas program to read rows 0, 5, 7 and all columns in a dataframe

In [18]:
data = pd.read_csv('train.csv')
df = pd.DataFrame(data)
display(df.iloc[0])
print()
display(df.iloc[5])
print()
display(df.iloc[7])

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                               22.0
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 0, dtype: object




PassengerId                   6
Survived                      0
Pclass                        3
Name           Moran, Mr. James
Sex                        male
Age                         NaN
SibSp                         0
Parch                         0
Ticket                   330877
Fare                     8.4583
Cabin                       NaN
Embarked                      Q
Name: 5, dtype: object




PassengerId                                 8
Survived                                    0
Pclass                                      3
Name           Palsson, Master. Gosta Leonard
Sex                                      male
Age                                       2.0
SibSp                                       3
Parch                                       1
Ticket                                 349909
Fare                                   21.075
Cabin                                     NaN
Embarked                                    S
Name: 7, dtype: object

### 9. Write a Pandas program to read rows in positions 0 and 1, columns in positions 0 and 3 in dataframe

In [22]:
data = pd.read_csv('train.csv')
df = pd.DataFrame(data)
display(df.iloc[0])
print()
display(df.iloc[1])
print()
print(df[['PassengerId', 'Name']])

PassengerId                          1
Survived                             0
Pclass                               3
Name           Braund, Mr. Owen Harris
Sex                               male
Age                               22.0
SibSp                                1
Parch                                0
Ticket                       A/5 21171
Fare                              7.25
Cabin                              NaN
Embarked                             S
Name: 0, dtype: object




PassengerId                                                    2
Survived                                                       1
Pclass                                                         1
Name           Cumings, Mrs. John Bradley (Florence Briggs Th...
Sex                                                       female
Age                                                         38.0
SibSp                                                          1
Parch                                                          0
Ticket                                                  PC 17599
Fare                                                     71.2833
Cabin                                                        C85
Embarked                                                       C
Name: 1, dtype: object


     PassengerId                                               Name
0              1                            Braund, Mr. Owen Harris
1              2  Cumings, Mrs. John Bradley (Florence Briggs Th...
2              3                             Heikkinen, Miss. Laina
3              4       Futrelle, Mrs. Jacques Heath (Lily May Peel)
4              5                           Allen, Mr. William Henry
..           ...                                                ...
886          887                              Montvila, Rev. Juozas
887          888                       Graham, Miss. Margaret Edith
888          889           Johnston, Miss. Catherine Helen "Carrie"
889          890                              Behr, Mr. Karl Howell
890          891                                Dooley, Mr. Patrick

[891 rows x 2 columns]


### 10. Write a Pandas program to count the duplicate rows of dataframe

In [26]:
data = pd.read_csv('train.csv')
df = pd.DataFrame(data)
print(df.duplicated().sum())

0


### 11. Remove duplicate rows from dataframe

In [28]:
df.drop_duplicates()
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


### 12. Write a Pandas program to rename two of the columns of the dataframe.

In [29]:
df.rename(columns = {'PassengerId':'Pid'}, inplace = True)
df.rename(columns = {'Fare':'Total Cost'}, inplace = True)
df

Unnamed: 0,Pid,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Total Cost,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C
