In [3]:
#Read the provided CSV file ‘data.csv’.

import pandas as pd
df = pd.read_csv('data.csv')     #reading csv file
df


Unnamed: 0,Duration,Pulse,Maxpulse,Calories
0,60,110,130,409.1
1,60,117,145,479.0
2,60,103,135,340.0
3,45,109,175,282.4
4,45,117,148,406.0
...,...,...,...,...
164,60,105,140,290.8
165,60,110,145,300.0
166,60,115,145,310.2
167,75,120,150,320.4


In [4]:
#Show the basic statistical description about the data.

df.describe()  
 #describe() results statistical description of data in data frame

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
count,169.0,169.0,169.0,164.0
mean,63.846154,107.461538,134.047337,375.790244
std,42.299949,14.510259,16.450434,266.379919
min,15.0,80.0,100.0,50.3
25%,45.0,100.0,124.0,250.925
50%,60.0,105.0,131.0,318.6
75%,60.0,111.0,141.0,387.6
max,300.0,159.0,184.0,1860.4


In [5]:
#Check if the data has null values.

df.isnull().any() 
 #check any column has null values

Duration    False
Pulse       False
Maxpulse    False
Calories     True
dtype: bool

In [6]:
#Replace the null values with the mean

mean=df['Calories'].mean()
df['Calories'].fillna(value=mean, inplace=True)  
#replacing Nan values with particular columns mean value

In [16]:
df.isnull().any()

Duration    False
Pulse       False
Maxpulse    False
Calories    False
dtype: bool

In [7]:
#Select at least two columns and aggregate the data using: min, max, count, mean.

df.agg({'Pulse' : ['min', 'max', 'count', 'mean'], 'Maxpulse' : ['min', 'max', 'count', 'mean'], 
        'Calories' : ['min', 'max', 'count', 'mean'] })
#agg method to aggreate operation on the dataframe

Unnamed: 0,Pulse,Maxpulse,Calories
min,80.0,100.0,50.3
max,159.0,184.0,1860.4
count,169.0,169.0,169.0
mean,107.461538,134.047337,375.790244


In [8]:
#Filter the dataframe to select the rows with calories values between 500 and 1000. 

df[(df['Calories'] >= 500) & (df['Calories'] <= 1000)] 
  #'&' operator to filter the dataframe

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
51,80,123,146,643.1
62,160,109,135,853.0
65,180,90,130,800.4
66,150,105,135,873.4
67,150,107,130,816.0
72,90,100,127,700.0
73,150,97,127,953.2
75,90,98,125,563.2
78,120,100,130,500.4
83,120,100,130,500.0


In [9]:
#Filter the dataframe to select the rows with calories values > 500 and pulse < 100.

df[(df['Calories'] > 500) & (df['Pulse'] < 100)]  
 # '&' operator is used to filter the data 

Unnamed: 0,Duration,Pulse,Maxpulse,Calories
65,180,90,130,800.4
70,150,97,129,1115.0
73,150,97,127,953.2
75,90,98,125,563.2
99,90,93,124,604.1
103,90,90,100,500.4
106,180,90,120,800.3
108,90,90,120,500.3


In [10]:
#Create a new “df_modified” dataframe that contains all the columns from df except for “Maxpulse”.

df_modified = df[['Duration', 'Pulse', 'Calories']].copy() 
 #copy method to create an another data frome with specified columns from the original dataframe.
df_modified

Unnamed: 0,Duration,Pulse,Calories
0,60,110,409.1
1,60,117,479.0
2,60,103,340.0
3,45,109,282.4
4,45,117,406.0
...,...,...,...
164,60,105,290.8
165,60,110,300.0
166,60,115,310.2
167,75,120,320.4


In [11]:
# Delete the “Maxpulse” column from the main df dataframe

df.pop('Maxpulse')  
 #pop method to remove a column from the data frame
df

Unnamed: 0,Duration,Pulse,Calories
0,60,110,409.1
1,60,117,479.0
2,60,103,340.0
3,45,109,282.4
4,45,117,406.0
...,...,...,...
164,60,105,290.8
165,60,110,300.0
166,60,115,310.2
167,75,120,320.4


In [12]:
#Convert the datatype of Calories column to int datatype.

df['Calories'] = df['Calories'].astype(int)  
#astype function converts one data type into another
df.dtypes

Duration    int64
Pulse       int64
Calories    int32
dtype: object