In [None]:
!pip install xlrd
!pip install openpyxl

In [1]:
import pandas as pd

## Modules

In [2]:
%%writefile greeter.py

def greet(name):
    print(f"Hello {name}!")

Writing greeter.py


In [3]:
import greeter

greeter.greet("Real Python")

Hello Real Python!


## Packages

In [4]:
!mkdir -p helpers

In [5]:
!touch helpers/__init__.py

In [6]:
%%writefile helpers/string.py
def shout(string):
    return string.upper()

Writing string.py


In [7]:
%%writefile helpers/math.py
def area(length, width):
    return length * width

Writing math.py


In [9]:
from helpers.string import shout
from helpers.math import area


length = 5
width = 8

message = f"The area of a {length}-by-{width} rectangle is {area(length, width)}"

print(shout(message))

THE AREA OF A 5-BY-8 RECTANGLE IS 40


## Pandas

Let’s get started!

Real world data is messy ( it contains null values, noises, missing values etc) and in most of the cases when we start the building the ML model we need to clean, format and pre process the data.

- **_Splitting data_** _using pandas can be done using the_ `_train_test_split_` _function from the_ `_sklearn.model_selection_` _module. This function allows you to split a dataset into training and testing sets._
- **_Binning data_** _involves grouping a set of continuous or numerical data into a smaller number of discrete “bins” or ranges. This can be done using the_ `_cut_` _or_ `_qcut_` _functions in pandas, which allow you to specify the number of bins and the labels for each bin._
- **_Mean imputation_** _is a method of replacing missing values with the mean value of the dataset. This can be done using the_ `_fillna_` _function in pandas and passing in the mean value of the dataset._
- **_Interpolation_** _is a method of estimating missing values by taking the average of the values on either side of the missing data point. This can be done using the_ `_interpolate_` _function in pandas, which can use various interpolation methods such as linear or polynomial._
- **_Combining data using pandas_** _can be done using the_ `_concat_` _function to concatenate multiple dataframes along a particular axis, or the_ `_join_` _function to join two dataframes on a common column or index._

In [2]:
#Define a dictionary 'x'
x = {'Name': ['Rose','John', 'Jane', 'Mary'], 'ID': [1, 2, 3, 4], 'Department': ['Architect Group', 'Software Group', 'Design Team', 'Infrastructure'], 
      'Salary':[100000, 80000, 50000, 60000]}

#casting the dictionary to a DataFrame
df = pd.DataFrame(x)

#display the result df
df

Unnamed: 0,Name,ID,Department,Salary
0,Rose,1,Architect Group,100000
1,John,2,Software Group,80000
2,Jane,3,Design Team,50000
3,Mary,4,Infrastructure,60000


In [3]:
#Retrieving the "ID" column and assigning it to a variable x
x = df[['ID']]
x

Unnamed: 0,ID
0,1
1,2
2,3
3,4


In [4]:
#check the type of x
type(x)

pandas.core.frame.DataFrame

In [5]:
#Retrieving the Department, Salary and ID columns and assigning it to a variable z

z = df[['Department','Salary','ID']]
z

Unnamed: 0,Department,Salary,ID
0,Architect Group,100000,1
1,Software Group,80000,2
2,Design Team,50000,3
3,Infrastructure,60000,4


In [8]:
# Access the value on the first row and the first column

df.iloc[0, 0]

'Rose'

In [9]:
# Access the value on the first row and the third column

df.iloc[0,2]

'Architect Group'

In [10]:
# Access the column using the name

df.loc[0, 'Salary']

100000

In [15]:
# let us do the slicing

df.iloc[0:2, 0:3]

Unnamed: 0,Name,ID,Department
0,Rose,1,Architect Group
1,John,2,Software Group


In [16]:
#let us do the slicing using loc()

df.loc[0:2,'ID':'Department']

Unnamed: 0,ID,Department
0,1,Architect Group
1,2,Software Group
2,3,Design Team


In [11]:
df2 = df
df2 = df2.set_index("Name")

#To display the first 5 rows of new dataframe
df2.head()

Unnamed: 0_level_0,ID,Department,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rose,1,Architect Group,100000
John,2,Software Group,80000
Jane,3,Design Team,50000
Mary,4,Infrastructure,60000


In [12]:
#Now, let us access the column using the name
df2.loc['Jane', 'Salary']

50000

In [17]:
df2.loc['Rose':'Jane', 'ID':'Department']

Unnamed: 0_level_0,ID,Department
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Rose,1,Architect Group
John,2,Software Group
Jane,3,Design Team


Q: Create a dataframe to display the result as below:

Student | Age | Country | Course | Marks |
| --- | --- | --- | --- | --- |
| 0 | David | 27 | UK | Python | 85 |
| 1 | Samuel | 24 | Canada | Data Structures | 72 |
| 2 | Terry | 22 | China | Machine Learning | 89 |
| 3 | Evan | 32 | USA | Web Development | 76 |

In [6]:
a = {'Student':['David', 'Samuel', 'Terry', 'Evan'],
     'Age':['27', '24', '22', '32'],
     'Country':['UK', 'Canada', 'China', 'USA'],
     'Course':['Python','Data Structures','Machine Learning','Web Development'],
     'Marks':['85','72','89','76']}
df1 = pd.DataFrame(a)
df1

Unnamed: 0,Student,Age,Country,Course,Marks
0,David,27,UK,Python,85
1,Samuel,24,Canada,Data Structures,72
2,Terry,22,China,Machine Learning,89
3,Evan,32,USA,Web Development,76


Q: Retrieve the Marks column and assign it to a variable b


In [7]:
b = df1[['Marks']]
b

Unnamed: 0,Marks
0,85
1,72
2,89
3,76


Q: Use the <code>loc()</code> function,to get the Department of Jane in the newly created dataframe df2

In [13]:
df2.loc['Jane', 'Department']

'Design Team'

Q: Use the <code>iloc()</code> function,to get the Salary of Mary in the newly created dataframe df2.

In [14]:
df2.iloc[3,2]

60000

In [27]:
df = pd.read_csv("https://github.com/datalaker/assets/files/10848119/data.csv")
df.head()

Unnamed: 0,name,age,street,city,state,zip,lng,lat
0,Jonathon Myers,41,87101 Ryan Plains Apt. 918,East Tamaraview,Montana,36335,-74.764331,-33.143414
1,Richard Moses,27,255 Walker Trail Apt. 235,Ballhaven,Nevada,11900,-24.597145,71.563941
2,Charles Smith,19,23460 Lauren Forges Suite 938,Hernandezside,Nevada,73620,160.219756,33.808708
3,Tammy Ford,79,848 Avila Neck Suite 892,Robbinsmouth,Vermont,43351,-64.448384,-28.593652
4,Kevin Becker,37,92571 Mackenzie Pine Suite 450,Wilsontown,Kansas,11496,146.620389,71.886168


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    1000 non-null   object 
 1   age     1000 non-null   int64  
 2   street  1000 non-null   object 
 3   city    1000 non-null   object 
 4   state   1000 non-null   object 
 5   zip     1000 non-null   int64  
 6   lng     1000 non-null   float64
 7   lat     1000 non-null   float64
dtypes: float64(2), int64(2), object(4)
memory usage: 62.6+ KB


In [29]:
df.loc[:,["city","state"]]

Unnamed: 0,city,state
0,East Tamaraview,Montana
1,Ballhaven,Nevada
2,Hernandezside,Nevada
3,Robbinsmouth,Vermont
4,Wilsontown,Kansas
...,...,...
995,Petersonview,New Mexico
996,Lake Katie,Nevada
997,Port Tracyberg,Tennessee
998,West Michael,Virginia


In [30]:
df.loc[1:4,["city","state"]]

Unnamed: 0,city,state
1,Ballhaven,Nevada
2,Hernandezside,Nevada
3,Robbinsmouth,Vermont
4,Wilsontown,Kansas


In [31]:
df.iloc[1:4,[3,4]]

Unnamed: 0,city,state
1,Ballhaven,Nevada
2,Hernandezside,Nevada
3,Robbinsmouth,Vermont


In [34]:
df['latlong']= df['lat']*df['lng']
df

Unnamed: 0,name,age,street,city,state,zip,lng,lat,latlong
0,Jonathon Myers,41,87101 Ryan Plains Apt. 918,East Tamaraview,Montana,36335,-74.764331,-33.143414,2477.945137
1,Richard Moses,27,255 Walker Trail Apt. 235,Ballhaven,Nevada,11900,-24.597145,71.563941,-1760.268634
2,Charles Smith,19,23460 Lauren Forges Suite 938,Hernandezside,Nevada,73620,160.219756,33.808708,5416.822946
3,Tammy Ford,79,848 Avila Neck Suite 892,Robbinsmouth,Vermont,43351,-64.448384,-28.593652,1842.814664
4,Kevin Becker,37,92571 Mackenzie Pine Suite 450,Wilsontown,Kansas,11496,146.620389,71.886168,10539.977916
...,...,...,...,...,...,...,...,...,...
995,Jennifer Mckay,65,274 Stephanie Falls Suite 617,Petersonview,New Mexico,35358,-9.854228,86.445643,-851.855076
996,Carmen Wright,76,3703 Hall Mews Apt. 531,Lake Katie,Nevada,55459,68.936281,-51.308090,-3536.988944
997,Dylan Stout,39,940 Hubbard Lodge Apt. 256,Port Tracyberg,Tennessee,73888,52.241034,-7.998483,-417.849022
998,Debbie Moody,27,7446 Jonathan Lakes Apt. 229,West Michael,Virginia,79397,-113.632353,49.893214,-5669.483249


In [35]:
df['ll']= df.apply(lambda row:row['lat']/2+row['lng'],axis=1)
df

Unnamed: 0,name,age,street,city,state,zip,lng,lat,latlong,ll
0,Jonathon Myers,41,87101 Ryan Plains Apt. 918,East Tamaraview,Montana,36335,-74.764331,-33.143414,2477.945137,-91.336038
1,Richard Moses,27,255 Walker Trail Apt. 235,Ballhaven,Nevada,11900,-24.597145,71.563941,-1760.268634,11.184825
2,Charles Smith,19,23460 Lauren Forges Suite 938,Hernandezside,Nevada,73620,160.219756,33.808708,5416.822946,177.124110
3,Tammy Ford,79,848 Avila Neck Suite 892,Robbinsmouth,Vermont,43351,-64.448384,-28.593652,1842.814664,-78.745210
4,Kevin Becker,37,92571 Mackenzie Pine Suite 450,Wilsontown,Kansas,11496,146.620389,71.886168,10539.977916,182.563473
...,...,...,...,...,...,...,...,...,...,...
995,Jennifer Mckay,65,274 Stephanie Falls Suite 617,Petersonview,New Mexico,35358,-9.854228,86.445643,-851.855076,33.368594
996,Carmen Wright,76,3703 Hall Mews Apt. 531,Lake Katie,Nevada,55459,68.936281,-51.308090,-3536.988944,43.282236
997,Dylan Stout,39,940 Hubbard Lodge Apt. 256,Port Tracyberg,Tennessee,73888,52.241034,-7.998483,-417.849022,48.241793
998,Debbie Moody,27,7446 Jonathan Lakes Apt. 229,West Michael,Virginia,79397,-113.632353,49.893214,-5669.483249,-88.685746


In [32]:
df1 = df.iloc[1:4,[3,4]]
df1.to_csv("../data/location.csv.gz", index=None,compression="gzip")

In [20]:
csv_path = "../data/TopSellingAlbums.csv"

df = pd.read_csv(csv_path)
df

Unnamed: 0,Artist,Album,Released,Length,Genre,Music Recording Sales (millions),Claimed Sales (millions),Released.1,Soundtrack,Rating
0,Michael Jackson,Thriller,1982,0:42:19,"pop, rock, R&B",46.0,65,30-Nov-82,,10.0
1,AC/DC,Back in Black,1980,0:42:11,hard rock,26.1,50,25-Jul-80,,9.5
2,Pink Floyd,The Dark Side of the Moon,1973,0:42:49,progressive rock,24.2,45,01-Mar-73,,9.0
3,Whitney Houston,The Bodyguard,1992,0:57:44,"R&B, soul, pop",27.4,44,17-Nov-92,Y,8.5
4,Meat Loaf,Bat Out of Hell,1977,0:46:33,"hard rock, progressive rock",20.6,43,21-Oct-77,,8.0
5,Eagles,Their Greatest Hits (1971-1975),1976,0:43:08,"rock, soft rock, folk rock",32.2,42,17-Feb-76,,7.5
6,Bee Gees,Saturday Night Fever,1977,1:15:54,disco,20.6,40,15-Nov-77,Y,7.0
7,Fleetwood Mac,Rumours,1977,0:40:01,soft rock,27.9,40,04-Feb-77,,6.5


In [22]:
xlsx_path = "../data/TopSellingAlbums.xlsx"

In [23]:
df = pd.read_excel(xlsx_path)
df.head()

Unnamed: 0,Artist,Album,Released,Length,Genre,Music Recording Sales (millions),Claimed Sales (millions),Released.1,Soundtrack,Rating
0,Michael Jackson,Thriller,1982,00:42:19,"pop, rock, R&B",46.0,65,1982-11-30,,10.0
1,AC/DC,Back in Black,1980,00:42:11,hard rock,26.1,50,1980-07-25,,9.5
2,Pink Floyd,The Dark Side of the Moon,1973,00:42:49,progressive rock,24.2,45,1973-03-01,,9.0
3,Whitney Houston,The Bodyguard,1992,00:57:44,"R&B, soul, pop",27.4,44,1992-11-17,Y,8.5
4,Meat Loaf,Bat Out of Hell,1977,00:46:33,"hard rock, progressive rock",20.6,43,1977-10-21,,8.0


In [24]:
# Access to the column Length

x = df[['Length']]
x

Unnamed: 0,Length
0,00:42:19
1,00:42:11
2,00:42:49
3,00:57:44
4,00:46:33
5,00:43:08
6,01:15:54
7,00:40:01


In [25]:
# Slicing the dataframe

df.iloc[0:2, 0:3]

Unnamed: 0,Artist,Album,Released
0,Michael Jackson,Thriller,1982
1,AC/DC,Back in Black,1980


### Titanic data wrangling

In [38]:
import pandas as pd

path = './data/titanic.csv'
dataframe = pd.read_csv(path)
dataframe.iloc[0]

Name        Allen, Miss Elisabeth Walton
PClass                               1st
Age                                 29.0
Sex                               female
Survived                               1
SexCode                                1
Name: 0, dtype: object

In [2]:
#filter rows
"""
Conditionally selecting and filtering data is one of the most common tasks in data wrangling. You rarely want all the raw data from the source; instead, you are interested in only some subsection of it. For example, you might only be interested in stores in certain states or the records of patients over a certain age.
"""
dataframe[(dataframe['Sex'] == 'female') & (dataframe['Age'] >= 65)]

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
73,"Crosby, Mrs Edward Gifford (Catherine Elizabet...",1st,69.0,female,1,1


In [3]:
#sort by Age
"""
During data analysis and exploration, it’s often useful to sort a DataFrame by a particular column or set of columns. The by argument to sort_values takes a list of columns by which to sort the DataFrame, and will sort based on the order of column names in the list.

By default, the ascending argument is set to True - so it will sort the values lowest to highest. If we wanted the oldest passengers instead of the youngest, we could set it so False.
"""
dataframe.sort_values(by=["Age"]).head(2)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
763,"Dean, Miss Elizabeth Gladys (Millvena)",3rd,0.17,female,1,1
751,"Danbom, Master Gilbert Sigvard Emanuel",3rd,0.33,male,0,0


In [4]:
#replace values
"""
replace is a tool we use to replace values that is simple and yet has the powerful ability to accept regular expressions.
"""
display(dataframe['Sex'].replace("female", "Woman").head(5))

#replace "female" and "male with "Woman" and "Man"
display(dataframe['Sex'].replace(["female", "male"], ["Woman", "Man"]).head(5))

#replace all values
display(dataframe.replace(1, "One").head(5))

#replace also accepts regular expressions
display(dataframe.replace(r"1st", "First", regex=True).head(5))

0    Woman
1    Woman
2     male
3    Woman
4     male
Name: Sex, dtype: object

0    Woman
1    Woman
2      Man
3    Woman
4      Man
Name: Sex, dtype: object

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,One,One
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,One
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",1st,25.0,female,0,One
4,"Allison, Master Hudson Trevor",1st,0.92,male,One,0


Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",First,29.0,female,1,1
1,"Allison, Miss Helen Loraine",First,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",First,30.0,male,0,0
3,"Allison, Mrs Hudson JC (Bessie Waldo Daniels)",First,25.0,female,0,1
4,"Allison, Master Hudson Trevor",First,0.92,male,1,0


In [5]:
#rename column, show two rows
dataframe.rename(columns={'PClass': 'Passenger Class', 'Sex': 'Gender'}).head(2)

Unnamed: 0,Name,Passenger Class,Age,Gender,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


In [6]:
#calculate statistics
"""
In addition to the statistics used in the solution, pandas offers variance (var), standard deviation (std), kurtosis (kurt), skewness (skew), standard error of the mean (sem), mode (mode), median (median), value counts, and a number of others.
"""
print('Maximum:', dataframe['Age'].max())
print('Minimum:', dataframe['Age'].min())
print('Mean:', dataframe['Age'].mean())
print('Sum:', dataframe['Age'].sum())
print('Count:', dataframe['Age'].count())

Maximum: 71.0
Minimum: 0.17
Mean: 30.397989417989418
Sum: 22980.88
Count: 756


In [7]:
#select unique values
dataframe['Sex'].unique()

array(['female', 'male'], dtype=object)

In [8]:
#show counts
dataframe['Sex'].value_counts()

male      851
female    462
Name: Sex, dtype: int64

In [9]:
#show number of unique values
dataframe['PClass'].nunique()

4

In [10]:
#select missing values, show two rows
dataframe[dataframe['Age'].isnull()].head(2)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
12,"Aubert, Mrs Leontine Pauline",1st,,female,1,1
13,"Barkworth, Mr Algernon H",1st,,male,1,0


In [15]:
#replace values with NaN
import numpy as np
display(dataframe['Sex'].head(5))
display(dataframe['Sex'].replace('male', np.nan).head(5))

0    female
1    female
2      male
3    female
4      male
Name: Sex, dtype: object

0    female
1    female
2       NaN
3    female
4       NaN
Name: Sex, dtype: object

In [24]:
#load data, set missing values
pd.read_csv(path, na_values=[np.nan, 'NONE', -999]).sample(5, random_state=42)

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
51,"Case, Mr Howard Brown",1st,49.0,male,0,0
405,"Fillbrook, Mr Charles",2nd,,male,0,0
721,"Coelho, Mr Domingos Fernandes",3rd,20.0,male,0,0
485,"Louch, Mr Charles Alexander",2nd,48.0,male,0,0
1177,"Sage, Mrs John",3rd,,female,0,1


In [28]:
#delete single column
display(dataframe.drop('Age', axis=1).head(2))

#delete multiple columns
display(dataframe.drop(['Age', 'Sex'], axis=1).head(2))

"""
if a column does not have a name (which can sometimes happen), you can drop it by its column index
"""
display(dataframe.drop(dataframe.columns[1], axis=1).head(2))

Unnamed: 0,Name,PClass,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,female,1,1
1,"Allison, Miss Helen Loraine",1st,female,0,1


Unnamed: 0,Name,PClass,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,1,1
1,"Allison, Miss Helen Loraine",1st,0,1


Unnamed: 0,Name,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",29.0,female,1,1
1,"Allison, Miss Helen Loraine",2.0,female,0,1


In [30]:
#delete rows, show first two rows of output
display(dataframe[dataframe['Sex'] != 'male'].head(2))

#we can even use it to delete a single row by row index
display(dataframe[dataframe.index != 0].head(2))

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0


In [31]:
#drop duplicates, show first two rows of output
display(dataframe.drop_duplicates().head(2))

display(dataframe.drop_duplicates(subset=['Sex']))

Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
1,"Allison, Miss Helen Loraine",1st,2.0,female,0,1


Unnamed: 0,Name,PClass,Age,Sex,Survived,SexCode
0,"Allen, Miss Elisabeth Walton",1st,29.0,female,1,1
2,"Allison, Mr Hudson Joshua Creighton",1st,30.0,male,0,0


In [33]:
#group rows by the values of the column 'Sex', calculate mean of each group
"""
groupby is where data wrangling really starts to take shape. It is very common to have a DataFrame where each row is a person or an event and we want to group them according to some criterion and then calculate a statistic. For example, you can imagine a DataFrame where each row is an individual sale at a national restaurant chain and we want the total sales per restaurant. We can accomplish this by grouping rows by individual resturants and then calculating the sum of each group.
"""
dataframe.groupby('Sex').mean(numeric_only=True)

Unnamed: 0_level_0,Age,Survived,SexCode
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,29.396424,0.666667,1.0
male,31.014338,0.166863,0.0


In [39]:
#get the minimum of every column
dataframe.agg("min")

Name        Abbing, Mr Anthony
PClass                       *
Age                       0.17
Sex                     female
Survived                     0
SexCode                      0
dtype: object

In [40]:
#mean Age, min and max SexCode
dataframe.agg({"Age":["mean"], "SexCode":["min", "max"]})

Unnamed: 0,Age,SexCode
mean,30.397989,
min,,0.0
max,,1.0


In [41]:
#print first two names uppercased
for name in dataframe['Name'][0:2]:
    print(name.upper())

ALLEN, MISS ELISABETH WALTON
ALLISON, MISS HELEN LORAINE


In [42]:
#create function
def uppercase(x):
    return x.upper()

#apply function, show two rows
"""
apply is a great way to do data cleaning and wrangling. It is common to write a function to perform some useful operation (separate first and last names, convert strings to floats, etc.) and then map that function to every element in a column.
"""
dataframe['Name'].apply(uppercase)[0:2]

0    ALLEN, MISS ELISABETH WALTON
1     ALLISON, MISS HELEN LORAINE
Name: Name, dtype: object

In [43]:
#group rows, apply function to groups
"""
By combining groupby and apply we can calculate custom statistics or apply any function to each group separately.
"""
dataframe.groupby('Sex').apply(lambda x: x.count())

Unnamed: 0_level_0,Name,PClass,Age,Sex,Survived,SexCode
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
female,462,462,288,462,462,462
male,851,851,468,851,851,851


### Grouping Rows by Time

In [34]:
# Load libraries
import pandas as pd
import numpy as np

# Create date range
time_index = pd.date_range('06/06/2017', periods=100000, freq='30S')

# Create DataFrame
dataframe = pd.DataFrame(index=time_index)

# Create column of random values
dataframe['Sale_Amount'] = np.random.randint(1, 10, 100000)

# Group rows by week, calculate sum per week
dataframe.resample('W').sum()

Unnamed: 0,Sale_Amount
2017-06-11,86158
2017-06-18,100501
2017-06-25,100508
2017-07-02,100823
2017-07-09,101008
2017-07-16,10573


In [35]:
# Group by two weeks, calculate mean
dataframe.resample('2W').mean()

Unnamed: 0,Sale_Amount
2017-06-11,4.985995
2017-06-25,4.985342
2017-07-09,5.005729
2017-07-23,5.083173


In [36]:
# Group by month, count rows
dataframe.resample('M').count()

Unnamed: 0,Sale_Amount
2017-06-30,72000
2017-07-31,28000


In [37]:
# Group by month, count rows
dataframe.resample('M', label='left').count()

Unnamed: 0,Sale_Amount
2017-05-31,72000
2017-06-30,28000


### Concatenating DataFrames

Concatenating is not a word you hear much outside of computer science and programming, so if you have not heard it before, do not worry. The informal definition of concatenate is to glue two objects together. In the solution we glued together two small DataFrames using the axis parameter to indicate whether we wanted to stack the two DataFrames on top of each other or place them side by side.

In [44]:
# Load library
import pandas as pd

# Create DataFrame
data_a = {'id': ['1', '2', '3'],
          'first': ['Alex', 'Amy', 'Allen'],
          'last': ['Anderson', 'Ackerman', 'Ali']}
dataframe_a = pd.DataFrame(data_a, columns = ['id', 'first', 'last'])

# Create DataFrame
data_b = {'id': ['4', '5', '6'],
          'first': ['Billy', 'Brian', 'Bran'],
          'last': ['Bonder', 'Black', 'Balwner']}
dataframe_b = pd.DataFrame(data_b, columns = ['id', 'first', 'last'])

# Concatenate DataFrames by rows
pd.concat([dataframe_a, dataframe_b], axis=0)

Unnamed: 0,id,first,last
0,1,Alex,Anderson
1,2,Amy,Ackerman
2,3,Allen,Ali
0,4,Billy,Bonder
1,5,Brian,Black
2,6,Bran,Balwner


In [45]:
# Concatenate DataFrames by columns
pd.concat([dataframe_a, dataframe_b], axis=1)

Unnamed: 0,id,first,last,id.1,first.1,last.1
0,1,Alex,Anderson,4,Billy,Bonder
1,2,Amy,Ackerman,5,Brian,Black
2,3,Allen,Ali,6,Bran,Balwner


### Merging DataFrames

In [46]:
# Load library
import pandas as pd

# Create DataFrame
employee_data = {'employee_id': ['1', '2', '3', '4'],
                 'name': ['Amy Jones', 'Allen Keys', 'Alice Bees',
                 'Tim Horton']}
dataframe_employees = pd.DataFrame(employee_data, columns = ['employee_id',
                                                              'name'])

# Create DataFrame
sales_data = {'employee_id': ['3', '4', '5', '6'],
              'total_sales': [23456, 2512, 2345, 1455]}
dataframe_sales = pd.DataFrame(sales_data, columns = ['employee_id',
                                                      'total_sales'])

# Merge DataFrames
pd.merge(dataframe_employees, dataframe_sales, on='employee_id')

Unnamed: 0,employee_id,name,total_sales
0,3,Alice Bees,23456
1,4,Tim Horton,2512


merge defaults to inner joins. If we want to do an outer join, we can specify that with the how parameter:

In [47]:
# Merge DataFrames
pd.merge(dataframe_employees, dataframe_sales, on='employee_id', how='outer')

Unnamed: 0,employee_id,name,total_sales
0,1,Amy Jones,
1,2,Allen Keys,
2,3,Alice Bees,23456.0
3,4,Tim Horton,2512.0
4,5,,2345.0
5,6,,1455.0


The same parameter can be used to specify left and right joins:

In [48]:
# Merge DataFrames
pd.merge(dataframe_employees, dataframe_sales, on='employee_id', how='left')

Unnamed: 0,employee_id,name,total_sales
0,1,Amy Jones,
1,2,Allen Keys,
2,3,Alice Bees,23456.0
3,4,Tim Horton,2512.0


We can also specify the column name in each DataFrame to merge on:

In [49]:
# Merge DataFrames
pd.merge(dataframe_employees,
         dataframe_sales,
         left_on='employee_id',
         right_on='employee_id')

Unnamed: 0,employee_id,name,total_sales
0,3,Alice Bees,23456
1,4,Tim Horton,2512


### Quiz

Q1. How do you load a CSV file into a Pandas DataFrame?

```
# Using read_csv() function

import pandas as pd
df = pd.read_csv('sample.csv')
```

Q2. How do you check the data type of a column in a Pandas DataFrame?

```
# Using dtype

import pandas as pd
my_dict = {"name": ['Vivek', 'Dave', 'Tom'], "age": [23, 40, 35]}
df = pd.DataFrame(my_dict)
print(df.dtypes)
```

Q3. How do you select rows from a Pandas DataFrame based on a condition?

```
import pandas as pd
my_dict = {"name": ['Vivek', 'Dave', 'Tom'], "age": [23, 40, 35]}
df = pd.DataFrame(my_dict)

# Selecting rows with age greater than 30
print(df[df['age'] > 30])
```

Q4. How do you rename columns in a Pandas DataFrame?

```
import pandas as pd
my_dict = {"name": ['Vivek', 'Dave', 'Tom'], "age": [23, 40, 35]}
df = pd.DataFrame(my_dict)
df.rename(columns = {'name': 'first name'}, inplace = True)
print(df)
```

Q5. How do you drop columns in a Pandas DataFrame?

```
import pandas as pd
my_dict = {"name": ['Vivek', 'Dave', 'Tom'], "age": [23, 40, 35]}
df = pd.DataFrame(my_dict)

# Using del
del df['age']
print(df)

df = pd.DataFrame(my_dict)

# Using pop
df.pop('age')
print(df)
```

Q6. How do you find the unique values in a column of a Pandas DataFrame?

```
import pandas as pd
my_dict = {"a": [1, 2, 3, 2, 1, 5], "b": [23, 40, 35, 40, 23, 26]}
df = pd.DataFrame(my_dict)
print(df['a'].unique())
```

Q7. How do you find the number of missing values in each column of a Pandas DataFrame?

```
import pandas as pd
my_dict = {"a": [1, 2, None, 2, 1, 5], "b": [None, 40, 35, None, 23, 26]}
df = pd.DataFrame(my_dict)
print(df.isnull().sum())
```

Q8. How do you fill missing values in a Pandas DataFrame with a specific value?

```
import pandas as pd
my_dict = {"a": [1, 2, None, 2, 1, 5], "b": [None, 40, 35, None, 23, 26]}
df = pd.DataFrame(my_dict)
df.fillna(999, inplace = True)
print(df)
```

Q9. How do you concatenate two Pandas DataFrames?

```
import pandas as pd
df1 = pd.DataFrame(data=[[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
df2 = pd.DataFrame(data=[[13, 14, 15, 16], [17, 18, 19, 20], [21, 22, 23, 24]])
df3 = pd.concat([df1, df2])
print(df3)
```

Q10. How do you merge two Pandas DataFrames on a specific column?

```
import pandas as pd
df1 = pd.DataFrame(data={'City': ['New York', 'Chicago', 'Los Angeles'], 'Temperature': [65, 70, 75]})
df2 = pd.DataFrame(data={'City': ['New York', 'Chicago', 'Los Angeles'], 'Humidity': [60, 50, 55]})
df3 = pd.merge(df1, df2, on = 'City')
print(df3)
```

Q11. How do you group data in a Pandas DataFrame by a specific column and apply an aggregation function?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Age': [2, 3, 2, 1, 2, 3]})

print(df.groupby('Animal').sum())
print(df.groupby('Animal').max())
```

Q12. How do you pivot a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Size': ['Small', 'Small', 'Medium', 'Small', 'Medium', 'Large'],
                   'Age': [2, 3, 2, 1, 2, 3]})

print(df.pivot(index = 'Animal', columns = ['Size']))
```

Q13. How do you change the data type of a column in a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Size': ['Small', 'Small', 'Medium', 'Small', 'Medium', 'Large'],
                   'Age': [2, 3, 2, 1, 2, 3]})

df['Age'] = df['Age'].astype(float)
print(df.dtypes)
```

Q14. How do you sort a Pandas DataFrame by a specific column?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Size': ['Small', 'Small', 'Medium', 'Small', 'Medium', 'Large'],
                   'Age': [2, 3, 2, 1, 2, 3]})

df.sort_values(by = ['Age'], inplace = True)
print(df)
```

Q15. How do you create a copy of a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Size': ['Small', 'Small', 'Medium', 'Small', 'Medium', 'Large'],
                   'Age': [2, 3, 2, 1, 2, 3]})

df2 = df.copy()
print(df2)
```

Q16. How do you filter rows of a Pandas DataFrame by multiple conditions?

```
import pandas as pd
df = pd.DataFrame({'Name': ['John', 'Jane', 'Bob', 'Alice', 'Mike', 'Carol', 'Steve', 'Kate', 'Adam', 'Tom'],
                   'Age': [35, 25, 55, 46, 32, 35, 30, 30, 26, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})

new_df = df.loc[(df['Age'] > 30) & (df['Gender'] == 'M')]
print(new_df)
```

Q17. How do you calculate the mean of a column in a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'Name': ['John', 'Jane', 'Bob', 'Alice', 'Mike', 'Carol', 'Steve', 'Kate', 'Adam', 'Tom'],
                   'Age': [35, 25, 55, 46, 32, 35, 30, 30, 26, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})

print(df['Age'].mean())
```

Q18. How do you calculate the standard deviation of a column in a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'Name': ['John', 'Jane', 'Bob', 'Alice', 'Mike', 'Carol', 'Steve', 'Kate', 'Adam', 'Tom'],
                   'Age': [35, 25, 55, 46, 32, 35, 30, 30, 26, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})

print(df['Age'].std())
```

Q19. How do you calculate the correlation between two columns in a Pandas DataFrame?

```
import pandas as pd
df = pd.DataFrame({'X': [1, 2, 3, 4, 5],
                   'Y': [2, 4, 6, 8, 10]})

corr = df['X'].corr(df['Y'])
print(corr)
```

Q20. How do you select specific columns in a DataFrame using their labels?

```
import pandas as pd
df = pd.DataFrame({'Name': ['John', 'Jane', 'Bob', 'Alice', 'Mike', 'Carol', 'Steve', 'Kate', 'Adam', 'Tom'],
                   'Age': [35, 25, 55, 46, 32, 35, 30, 30, 26, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})

print(df.loc[:, ['Name', 'Gender']])
```

Q21. How do you select specific rows in a DataFrame using their indexes?

```
import pandas as pd
df = pd.DataFrame({'Name': ['John', 'Jane', 'Bob', 'Alice', 'Mike', 'Carol', 'Steve', 'Kate', 'Adam', 'Tom'],
                   'Age': [35, 25, 55, 46, 32, 35, 30, 30, 26, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})

print(df.iloc[3:9])
```

Q22. How do you sort a DataFrame by a specific column?

```
import pandas as pd
df = pd.DataFrame({'Animal': ['Dog', 'Cat', 'Dog', 'Fish', 'Fish', 'Fish'],
                   'Size': ['Small', 'Small', 'Medium', 'Small', 'Medium', 'Large'],
                   'Age': [2, 3, 2, 1, 2, 3]})

df.sort_values(by = ['Age'], inplace = True)
print(df)
```

Q23. How do you create a new column in a DataFrame based on the values of another column?

```
import pandas as pd
df = pd.DataFrame({'Age': [35, 25, 10, 46, 32, 12, 30, 30, 5, 27],
                   'Gender': ['M', 'F', 'M', 'F', 'M', 'F', 'M', 'F', 'M', 'M']})
df['Eligible'] = df['Age'] > 18
print(df)
```

Q24. How do you remove duplicates from a DataFrame?

```
import pandas as pd
df = pd.DataFrame({'ID': [1, 2, 2, 3, 3, 4, 4, 5],
                   'Name': ['John', 'Jane', 'Jane', 'Bob', 'Bob', 'Alice', 'Alice', 'Mike']})
                 
df = df.drop_duplicates()
print(df)
```

Q25. What is the difference between .loc and .iloc in Pandas?

- ***.loc[]*** is used to select rows and columns by their labels
- ***.iloc[]*** is used to select rows and columns by their position. It uses an integer index.

## Assignment

Q1. Why do we call Python as a general purpose and high-level programming language?

> Python is called as general purpose language because it is not limited to a specific application or use case. In fact it is used in variety of applications like Web Development, Data Analytics, Data Science, Big Data, Application development, etc. Python is called high level language because it is easy to understand & closely resembles to English language.

Q2. Why is Python called a dynamically typed language?

> Unlike other languages in Python we do not require to declare datatype of a variable. Python dynamically identifies the datatype based on the data stored in the variable.

Q3. List some pros and cons of Python programming language?

> **Pros:**
>
> 1. Easy to learn
> 2. Easy to develop code/application
> 3. Wide variety of libraries.
>    **Cons:**
> 4. Low performance
> 5. High memory requirement
> 6. Difficult to optimize

Q4. In what all domains can we use Python?

> Python can be used in Data Science, Data Engineering, Data Analytics, Web Development, Application development, etc.

Q5. What are variable and how can we declare them?

> Variable is a named entity which references to a memory location. We can store value in variables.
> e.g. a = 10

Q6. How can we take an input from the user in Python?

> We can use input function in Python to take user input.
> e.g name = input("Enter your name: ")

Q7. What is the default datatype of the value that has been taken as an input using input() function?

> The default datatype of the value from input function is string.

Q8. What is type casting?

> With the help of type casting we can change the dataype of a variable.

Q9. Can we take more than one input from the user using single input() function? If yes, how? If no, why?

> We can use split() function to take multiple inputs from user.
> We can also use input function inside a loop to take input in each iteration of loop.

Q10. What are keywords?

> Keywords are predefined words in Python. We cannot use these words to name any variable, function, class, etc in our code.

Q11. Can we use keywords as a variable? Support your answer with reason.

> We cannot use keyword as a variable name. This may override the predefined purpose of the keyword & end up as an error.

Q12. What is indentation? What's the use of indentation in Python?

> Indentation is nothing but 4 consecutive spaces in Python. It is used to identify the section/block of code. It is used in functions, classes, loops, decision statements, etc

Q13. How can we throw some output in Python?

> We can throw output in python using print() function.
> e.g. print("Hello World!!")

Q14. What are operators in Python?

> Operators in Python are used to perform Arithmetic, Logical, Relational, etc operations on the variables.

Q15. What is difference between / and // operators?

> / -> The output of this operator is of float datatype.
> // -> The output of this operator is of integer datatype.

Q16. Write a code that gives following as an output.

```
iNeuroniNeuroniNeuroniNeuron
```

    print('iNeuron' * 4)

Q17. Write a code to take a number as an input from the user and check if the number is odd or even.

    num = int(input("Enter a number: "))
	if num % 2:
		print("Odd")
	else:
	print("Even")

Q18. What are boolean operator?

> Boolean operators are used to check if given condition is True or False.

Q19. What will the output of the following?

```
1 or 0
0 and 0
True and False and True
1 or 0 or 0
```

    1 or 0 -> 1
	0 and 0 -> 0
	True and False and True -> False
	1 or 0 or 0 -> 1

Q20. What are conditional statements in Python?

> Conditional statements execute the block of code contained inside it only if the specified condition is satisfied. They are used to control the flow of code.

Q21. What is use of 'if', 'elif' and 'else' keywords?

> if: Used to check the first condition.
> elif: Used to check the nth condition in a nested if-else statement.
> else: It is executed when all conditions in if & elif are not satisfied.

Q22. Write a code to take the age of person as an input and if age >= 18 display "I can vote". If age is < 18 display "I can't vote".

    age = int(input("Enter age: "))
	if age >= 18:
		print("I can vote")
	else:
		print("I can't vote")

Q23. Write a code that displays the sum of all the even numbers from the given list.

```
numbers = [12, 75, 150, 180, 145, 525, 50]
```

    sum = 0
	for num in numbers:
		if num %2 == 0:
			sum = sum + num
	print(sum)

Q24. Write a code to take 3 numbers as an input from the user and display the greatest no as output.

    n1 = int(input("Enter 1st number: "))
	n2 = int(input("Enter 2nd number: "))
	n3 = int(input("Enter 3rd number: "))
	if n1 > n2 and n1 > n3:
		print(n1)
	elif n2 > n1 and n2 > n3:
		print(n2)
	else:
		print(n3)

Q25. Write a program to display only those numbers from a list that satisfy the following conditions

- The number must be divisible by five
- If the number is greater than 150, then skip it and move to the next number
- If the number is greater than 500, then stop the loop

```
numbers = [12, 75, 150, 180, 145, 525, 50]
```

    for num in numbers:
		if num > 500:
			break
		elif num < 151 and num % 5 == 0:
			print(num)

Q26. What is a string? How can we declare string in Python?

> String datatype in Python is used to store character or text data. We can declare string in Python using:

- Single quotes: 'Big Data'
- Double quotes: "Big Data"
- Triple quotes: ''' Big Data'''

Q27. How can we access the string using its index?

> We can access string or its part by providing the index no. or range of index nos.

    sample_str = 'Big Data Engineer'
    sample_str[0:3] -> 'Big'
    sample_str[ : : -1] -> 'reenignE ataD giB'

Q28. Write a code to get the desired output of the following

```
string = "Big Data iNeuron"
desired_output = "iNeuron"
```

    print(string[9:])

Q29. Write a code to get the desired output of the following

```
string = "Big Data iNeuron"
desired_output = "norueNi"
```

    print(string[-1:-8:-1])

Q30. Reverse the string given in the above question.

    print(string[::-1])

Q31. How can you delete entire string at once?

    del string

Q32. What is escape sequence?

> Escape sequence ( \ ) is used to treat the special character as a part of string to avoid the errors.
> e.g. string = "I opted for \"Big Data Engineering\" course"

Q33. How can you print the below string?

```
'iNeuron's Big Data Course'
```

    print("'iNeuron's Big Data Course'")
    # or
    print('\'iNeuron\'s Big Data Course\'')

Q34. What is a list in Python?

> List in python is a sequential datatype. It is used to store different types of data in a sequential manner.

Q35. How can you create a list in Python?

    sample_list = [1, 'Hi', True, 4.5]

Q36. How can we access the elements in a list?

> We can access the elements of list using index.

    sample_list = [1, 'Hi', True, 4.5]
    smaple_list[0] -> 1
    smaple_list[1] -> 'Hi'
    smaple_list[-1] -> 4.5

Q37. Write a code to access the word "iNeuron" from the given list.

```
lst = [1,2,3,"Hi",[45,54, "iNeuron"], "Big Data"]
```

    print(lst[4][2])

Q38. Take a list as an input from the user and find the length of the list.

    # Method 1
    input_list = input('Enter , separated list items:').split(',')
    print(input_list)

    # Method 2
	input_list = eval(input('Enter the list with []: '))
	print(input_list)

Q39. Add the word "Big" in the 3rd index of the given list.

```
lst = ["Welcome", "to", "Data", "course"]
```

    lst.insert(3, 'Big')
    # Output -> ["Welcome", "to", "Data", "Big", "course"]
    lst.insert(2, 'Big')
    # Output -> ["Welcome", "to", "Big", "Data", "course"]

Q40. What is a tuple? How is it different from list?

> Tuple is another type of sequential datatype. We can store different types of data inside it.
> The difference between Tuple & List is, List is mutable & Tuple is immutable.

Q41. How can you create a tuple in Python

    # Using () brackets
	tup1 = (1, 2, 'Big', 'Data', 2.0)

Q42. Create a tuple and try to add your name in the tuple. Are you able to do it? Support your answer with reason.

> We cannot add data after creating the Tuple as tuples are immutable.
> The only way to update the tuple is to overwrite the entire tuple with required changes.

Q43. Can two tuple be appended. If yes, write a code for it. If not, why?

> We can combine two tuples using + operator.

    tup1 = (1,2,3,4)
	tup2 = (5,6,7,8)
	combined_tuple = tup1 + tup2
	print(combined_tuple)

Q44. Take a tuple as an input and print the count of elements in it.

    tup1 = eval(input('Enter the tuple elements:'))
	print('Length of the tuple is:', len(tup1))

Q45. What are sets in Python?

> Set is a data type in pyton which is collection of unique elements. It is not indexed.

Q46. How can you create a set?

    # Method 1
	s1 = {1, 2, 3}

    # Method 2
	s2 = set([1,2,3])

Q47. Create a set and add "iNeuron" in your set.

    s1 = {'Big', 'Data', 'Engineer'}
	s1.add('iNeuron')

Q48. Try to add multiple values using add() function.

> We cannot add multiple values using add() function.

Q49. How is update() different from add()?

> update() function can add multiple elements in a set if iterable items like list, tuple, string or another set is providied to it as an argument.

Q50. What is clear() in sets?

> clear() function is used to delete all elements in set & make it empty.

Q51. What is frozen set?

> Frozen set is datatype in Pyton. It is similar to set except it is immutable.

Q52. How is frozen set different from set?

> Set is mutable while frozen set is immutable.

Q53. What is union() in sets? Explain via code.

> union() in set is used to combine the elements of two different sets.

    s1 = {'Big', 'Data', 'Engineer'}
	s2 = {1, 2, 3}
	print(s1.union(s2))
	# Output -> {1, 2, 3, 'Data', 'Engineer', 'Big'}

Q54. What is intersection() in sets? Explain via code.

> intersection() in sets is used to get only those elements which are present in both the sets.

    s1 = {'Data', 'Engineer', 'Software', 'Developer'}
	s2 = {'Software', 'Engineer', 'Data', 'Science'}
	print(s1.intersection(s2))
	# Output -> {'Data', 'Software', 'Engineer'}

Q55. What is dictionary in Python?

> dictionary data type in Python is used to store data in the form of key-value pairs.

Q56. How is dictionary different from all other data structures.

> dictionary stores data in key-value pair. While most of the sequential data types use indexes to access data dictionary uses keys.

Q57. How can we declare a dictionary in Python?

    # Empty dictionary
	dict1 = dict()
	dict2 = {}

    # Dictionary with elements
	dict3 = {'name':'Vivek', 'age':23, 'city':'Pune'}

Q58. What will the output of the following?

```
var = {}
print(type(var))
```

> <class 'dict'>

Q59. How can we add an element in a dictionary?

    dict1 = dict()

    # Method 1
	dict1.update({'name':'Vivek'})

    # Method 2
	dict1['age'] = 23

    print(dict1)
	# Output -> {'name': 'Vivek', 'age': 23}

Q60. Create a dictionary and access all the values in that dictionary.

    dict1 = {'name':'Vivek', 'age':23, 'city':'Pune'}
	print(dict1.values())
	# Output -> dict_values(['Vivek', 23, 'Pune'])

Q61. Create a nested dictionary and access all the element in the inner dictionary.

    dict1 = {'name':'Vivek', 'age':23, 'city':'Pune', 'skills': {'language':'Python', 'database':'MySQL'}}
	print(dict1.get('skills'))
	# Output -> {'language':'Python', 'database':'MySQL'}

Q62. What is the use of get() function?

> get() function is used to get value corresponding to the key given as an argument.

Q63. What is the use of items() function?

> items() function is used to get a list of key-value tuples of a dictionary

Q64. What is the use of pop() function?

> pop() function is used to get value corresponding to the key given as an argument. It also deletes that key-value pair from dictionary.

Q65. What is the use of popitems() function?

> It removes the last item inserted from a dictionary.

Q66. What is the use of keys() function?

> keys() function is used to get all the keys from a dictionary.

Q67. What is the use of values() function?

> values() function is used to get all the values from a dictionary.

Q68. What are loops in Python?

> Loops in Python are used to execute a block of code repeatedly until the condition is True.

Q69. How many type of loop are there in Python?

> In Python there are two types of loops

- for loop
- while loop

Q70. What is the difference between for and while loops?

> Mostly we use 'for loop' for a particular range and we use 'while loop' until a particular condition is True.

Q71. What is the use of continue statement?

> continue statement is used to skip the current execution of the loop & move to the next iteration.

Q72. What is the use of break statement?

> break statement is used to exit from the loop.

Q73. What is the use of pass statement?

> pass statement is used to temporarily execute the program without the block of code which we have planned to develop in future.

Q74. What is the use of range() function?

> range() function generates range of numbers which have been passed to it as an argument.

Q75. How can you loop over a dictionary?

> We can use items() function to loop through a dictionary.

    dict1 = {'name':'Vivek', 'age':23, 'city':'Pune', 'skills': {'language':'Python', 'database':'MySQL'}}
	for k,v in dict1.items():
	    print('Key:', k, 'Value:', v)

### Coding problems

Q76. Write a Python program to find the factorial of a given number.

    def fact(num):
	    result = 1
	    for n in range(2, num+1):
		result = result * n
	    return result

    input_number = int(input('Enter a number: '))
	factorial = fact(input_number)
	print('Factorial of', input_number, 'is', factorial)

Q77. Write a Python program to calculate the simple interest. Formula to calculate simple interest is SI = (P*R*T)/100

    p = int(input('Enter principal amount: '))
	r = int(input('Enter annual interest rate: '))
	t = int(input('Enter time (in years): '))

    simple_interest = (p * r * t) / 100
	print('Simple interest:', simple_interest)

Q78. Write a Python program to calculate the compound interest. Formula of compound interest is A = P(1+ R/100)^t.

    p = int(input('Enter principal amount: '))
	r = int(input('Enter annual interest rate: '))
	t = int(input('Enter time (in years): '))

    compound_interest = p * (1 + r / 100) ** t
	print('Compound interest:', compound_interest)

Q79. Write a Python program to check if a number is prime or not.

    num = int(input('Enter a number: '))

    if num == 0 or num ==1:
		   print('The input number', num, 'is not a prime number')
	else:
		for n in range(2, num):
			if num % n == 0:
				print('The input number', num, 'is not a prime number')
				break
		else:
			print('The input number', num, 'is a prime number')

Q80. Write a Python program to check Armstrong Number.

    num = input('Enter a number: ')
	result = 0

    for n in num:
		result += int(n) ** 3

    if int(num) == result:
		print(num, 'is a Armstrong number')
	else:
		print(num, 'is not a Armstrong number')

Q81. Write a Python program to find the n-th Fibonacci Number.

    num = int(input('Enter a number: '))
	fibonacci = [0, 1]

    if num == 0:
	    print('The nth Fibonacci numbre is 0')
	elif num == 1:
	    print('The nth Fibonacci numbre is 1')
	else:
	    for n in range(2, num + 1):
		result = fibonacci[-1] + fibonacci[-2]
		fibonacci.append(result)
	    print('The nth Fibonacci numbre is', fibonacci[-1])

Q82. Write a Python program to interchange the first and last element in a list.

    lst = [1, 2, 3, 4, 5]
	lst[0], lst[-1] = lst[-1], lst[0]
	print(lst)
	# Output -> [5, 2, 3, 4, 1]

Q83. Write a Python program to swap two elements in a list.

    lst = [1, 2, 3, 4, 5]
	i1 = int(input('Enter 1st index to swap: '))
	i2 = int(input('Enter 2nd index to swap: '))
	lst[i1], lst[i2] = lst[i2], lst[i1]
	print(lst)

Q84. Write a Python program to find N largest element from a list.

    lst = [3, 5, 1, 2, 4]
	n = int(input('Enter the no. of largest numbers required: '))
	lst.sort(reverse=True)
	print(lst[0:n])

Q85. Write a Python program to find cumulative sum of a list.

    lst = [3, 5, 1, 2, 4]
	cumulative_sum = 0
	for i in range(len(lst)):
	    cumulative_sum += lst[i]
	    lst[i] = cumulative_sum

    print(lst)
	# Output -> [3, 8, 9, 11, 15]

Q86. Write a Python program to check if a string is palindrome or not.

    s = input('Enter a string: ')
	reverse_s = s[ : : -1]

    if s.upper() == reverse_s.upper():
	    print('The string is palindrome ')
	else:
	    print('The string is not palindrome ')

Q87. Write a Python program to remove i'th element from a string.

    s = input('Enter a string: ')
	i = int(input('Enter index of element to be removed: '))
	s = s[:i] + s[i+1:]
	print(s)

Q88. Write a Python program to check if a substring is present in a given string.

    s = input('Enter a string: ')
	substr = input('Enter sub string: ')
	if s.find(substr) == -1:
	    print('Substring not present')
	else:
	    print('Substring is present')

Q89. Write a Python program to find words which are greater than given length k.

    s = input('Enter a string: ')
	k = int(input('Enter desired length: '))

    lst = s.split()
	for word in lst:
	    if len(word) > k:
		print(word)

Q90. Write a Python program to extract unquire dictionary values.

    test_dict = {'my': [1, 8, 9, 6], 'big': [10, 11, 9, 1], 'data': [6, 12, 10, 6], 'dict': [5, 2, 1]}
	result = []
	for v in test_dict.values():
	    result += v
	print(list(set(result)))

Q91. Write a Python program to merge two dictionary.

    dict1 = {'a':1, 'b':2, 'c':3}
	dict2 = {'d':4, 'e':5, 'f':6}

    dict1.update(dict2)
	print(dict1)
	# Output -> {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6}

Q92. Write a Python program to convert a list of tuples into dictionary.

```
Input : [('Sachin', 10), ('MSD', 7), ('Kohli', 18), ('Rohit', 45)]
Output : {'Sachin': 10, 'MSD': 7, 'Kohli': 18, 'Rohit': 45}
```

    lst = [('Sachin', 10), ('MSD', 7), ('Kohli', 18), ('Rohit', 45)]
	my_dict = dict(lst)
	print(my_dict)
	# Output -> {'Sachin': 10, 'MSD': 7, 'Kohli': 18, 'Rohit': 45}

Q93. Write a Python program to create a list of tuples from given list having number and its cube in each tuple.

```
Input: list = [9, 5, 6]
Output: [(9, 729), (5, 125), (6, 216)]
```

    lst = [9, 5, 6]
	result = []
	for num in lst:
		result.append((num, num**3))

    print(result)
	# Output -> [(9, 729), (5, 125), (6, 216)]

Q94. Write a Python program to get all combinations of 2 tuples.

```
Input : test_tuple1 = (7, 2), test_tuple2 = (7, 8)
Output : [(7, 7), (7, 8), (2, 7), (2, 8), (7, 7), (7, 2), (8, 7), (8, 2)]
```

    test_tuple1 = (7, 2)
	test_tuple2 = (7, 8)

    result = []
	for i in test_tuple1:
		for j in test_tuple2:
			result.append((i, j))
    result.append((j, i))

    print(result)
	# Output -> [(7, 7), (7, 7), (7, 8), (8, 7), (2, 7), (7, 2), (2, 8), (8, 2)]

Q95. Write a Python program to sort a list of tuples by second item.

```
Input : [('for', 24), ('Geeks', 8), ('Geeks', 30)] 
Output : [('Geeks', 8), ('for', 24), ('Geeks', 30)]
```

    lst = [('for', 24), ('Geeks', 8), ('Geeks', 30)]

    def second_item(tup):
		return tup[1]

    lst.sort(key=second_item)
	print(lst)
	# Output -> [('Geeks', 8), ('for', 24), ('Geeks', 30)]

Q96. Write a python program to print below pattern.

```
* 
* * 
* * * 
* * * * 
* * * * * 
```

    n = 5
	for i in range(1, n+1):
		print('* ' * i)

Q97. Write a python program to print below pattern.

```
    *
   **
  ***
 ****
*****
```

    n = 5
	for i in range(1, n+1):
		print(' ' * (n-i), '*' * i)

Q98. Write a python program to print below pattern.

```
    * 
   * * 
  * * * 
 * * * * 
* * * * * 
```

    n = 5
	for i in range(1, n+1):
		print(' ' * (n-i), '* ' * i)

Q99. Write a python program to print below pattern.

```
1 
1 2 
1 2 3 
1 2 3 4 
1 2 3 4 5
```

    n = 5
	for i in range(1, n+1):
		for j in range(1, i+1):
			print(j, end=' ')
		print()

Q100. Write a python program to print below pattern.

```
A 
B B 
C C C 
D D D D 
E E E E E 
```

    import string
	alpha = list(string.ascii_uppercase)
	n = 5
	for i in range(1, n+1):
		print((alpha[i-1] + ' ') * i)