# MODULE - 1 : Importing Datasets

### Importing and Exporting Data in Python

In [2]:
# pip install pandas

import pandas as pd

# Raw data file (CSV-like) for the UCI Automobile dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.data"

# Column names from the dataset's .names file
cols = [
    'symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
    'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
    'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',
    'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',
    'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg',
    'price'
]

# The dataset uses '?' for missing values
df = pd.read_csv(url, header=None, names=cols, na_values='?')

print(df.head())


   symboling  normalized-losses         make fuel-type aspiration  \
0          3                NaN  alfa-romero       gas        std   
1          3                NaN  alfa-romero       gas        std   
2          1                NaN  alfa-romero       gas        std   
3          2              164.0         audi       gas        std   
4          2              164.0         audi       gas        std   

  num-of-doors   body-style drive-wheels engine-location  wheel-base  ...  \
0          two  convertible          rwd           front        88.6  ...   
1          two  convertible          rwd           front        88.6  ...   
2          two    hatchback          rwd           front        94.5  ...   
3         four        sedan          fwd           front        99.8  ...   
4         four        sedan          4wd           front        99.4  ...   

   engine-size  fuel-system  bore  stroke compression-ratio horsepower  \
0          130         mpfi  3.47    2.68       

### Accessing Databases with Python

In [None]:
# Writing code using DB-API , example given by Coursera video "Accessing Databases with Python"

from dmoudule import connect   
# here dmoudule isn’t a real package—it’s a typo/placeholder , 
# In Python’s DB-API you import a real database driver, e.g.:-
# SQLite: import sqlite3 (built-in)
# PostgreSQL: import psycopg2
# MySQL: import mysql.connector or import pymysql
# SQL Server: import pyodbc

# Create connection object
connection = connect('database', 'username','pswd')

# Create cursor object
cursor = connection.cursor()

# Run queries
cursor.execute('select * from mytable')
results = cursor.fetchall()

# free resources
cursor.close()
connection.close()



ModuleNotFoundError: No module named 'dmoudule'

### Read Data

We utilize the pandas.read_csv() function for reading CSV files. However, in this version of the lab, which operates on JupyterLite, the dataset needs to be downloaded to the interface using the provided code below. 
The functions below will download the dataset into your browser:

In [None]:
# below version is for JupyterLite browser , refer to below sync and async version of code.

# import pandas library
import pandas as pd
import numpy as np
import requests

# from pyodide.http import pyfetch

async def download(url, filename):
    response = await pyfetch(url)
    if response.status == 200:
        with open(filename, "wb") as f:
            f.write(await response.bytes())
            
file_path='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-SkillsNetwork/labs/Data%20files/auto.csv'

# To obtain the dataset, utilize the download() function as defined above:
await download(file_path, "auto.csv")
file_name="auto.csv"
df = pd.read_csv(file_name)


NameError: name 'pyfetch' is not defined

✅ Simple (sync) — just run this one cell

In [None]:
import pandas as pd
import requests

file_url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-SkillsNetwork/labs/Data%20files/auto.csv"
dest = "auto.csv"

# Download
r = requests.get(file_url)
r.raise_for_status()
with open(dest, "wb") as f:
    f.write(r.content)

# Read the CSV
df = pd.read_csv(dest)
df.head()


Unnamed: 0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
1,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
2,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
3,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
4,2,?,audi,gas,std,two,sedan,fwd,front,99.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250


(Optional) Async version that keeps await

In [19]:
# %pip install aiohttp

import pandas as pd
import aiohttp, asyncio

async def download(url, filename):
    async with aiohttp.ClientSession() as s:
        async with s.get(url) as resp:
            resp.raise_for_status()
            with open(filename, "wb") as f:
                while True:
                    chunk = await resp.content.read(8192)
                    if not chunk:
                        break
                    f.write(chunk)

file_path = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-SkillsNetwork/labs/Data%20files/auto.csv"
await download(file_path, "auto.csv")

df = pd.read_csv("auto.csv")
df.head()


Unnamed: 0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
1,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
2,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
3,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
4,2,?,audi,gas,std,two,sedan,fwd,front,99.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250


Question #1:
Check the bottom 10 rows of data frame "df".

In [21]:
# Check the bottom 10 rows of data frame "df".

print("The last 10 rows of the dataframe\n")
df.tail(10)

The last 10 rows of the dataframe



Unnamed: 0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495
194,-1,74,volvo,gas,std,four,wagon,rwd,front,104.3,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,13415
195,-2,103,volvo,gas,std,four,sedan,rwd,front,104.3,...,141,mpfi,3.78,3.15,9.5,114,5400,24,28,15985
196,-1,74,volvo,gas,std,four,wagon,rwd,front,104.3,...,141,mpfi,3.78,3.15,9.5,114,5400,24,28,16515
197,-2,103,volvo,gas,turbo,four,sedan,rwd,front,104.3,...,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18420
198,-1,74,volvo,gas,turbo,four,wagon,rwd,front,104.3,...,130,mpfi,3.62,3.15,7.5,162,5100,17,22,18950
199,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845
200,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045
201,-1,95,volvo,gas,std,four,sedan,rwd,front,109.1,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485
202,-1,95,volvo,diesel,turbo,four,sedan,rwd,front,109.1,...,145,idi,3.01,3.4,23.0,106,4800,26,27,22470
203,-1,95,volvo,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,19,25,22625


### Add Headers

Take a look at the data set. Pandas automatically set the header with an integer starting from 0.

To better describe the data, you can introduce a header. This information is available at: https://archive.ics.uci.edu/ml/datasets/Automobile.

Thus, you have to add headers manually.

First, create a list "headers" that include all column names in order. Then, use dataframe.columns = headers to replace the headers with the list you created. 

In [22]:
# create headers list
headers = ["symboling","normalized-losses","make","fuel-type","aspiration", "num-of-doors","body-style",
         "drive-wheels","engine-location","wheel-base", "length","width","height","curb-weight","engine-type",
         "num-of-cylinders", "engine-size","fuel-system","bore","stroke","compression-ratio","horsepower",
         "peak-rpm","city-mpg","highway-mpg","price"]
print("headers\n", headers)

headers
 ['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration', 'num-of-doors', 'body-style', 'drive-wheels', 'engine-location', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type', 'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']


In [23]:
# Replace headers and recheck our data frame:
df.columns = headers
df.columns

Index(['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
       'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
       'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',
       'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',
       'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',
       'highway-mpg', 'price'],
      dtype='object')

In [24]:
# You can also see the first 10 entries of the updated data frame and note that the headers are updated.
df.head(10)

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,?,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
1,1,?,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
2,2,164,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
3,2,164,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
4,2,?,audi,gas,std,two,sedan,fwd,front,99.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250
5,1,158,audi,gas,std,four,sedan,fwd,front,105.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
6,1,?,audi,gas,std,four,wagon,fwd,front,105.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
7,1,158,audi,gas,turbo,four,sedan,fwd,front,105.8,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
8,0,?,audi,gas,turbo,two,hatchback,4wd,front,99.5,...,131,mpfi,3.13,3.4,7.0,160,5500,16,22,?
9,2,192,bmw,gas,std,two,sedan,rwd,front,101.2,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430


In [26]:
# Now, we need to replace the "?" symbol with NaN so the dropna() can remove the missing values:
df1=df.replace('?', np.nan)

In [None]:
# You can drop missing values along the column "price" as follows:
df=df1.dropna(subset=["price"], axis=0)   # Here, axis=0 means that the contents along the entire row will be dropped wherever the entity 'price' is found to be NaN
df.head(20)

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
0,3,,alfa-romero,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500
1,1,,alfa-romero,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500
2,2,164.0,audi,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950
3,2,164.0,audi,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450
4,2,,audi,gas,std,two,sedan,fwd,front,99.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,15250
5,1,158.0,audi,gas,std,four,sedan,fwd,front,105.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,17710
6,1,,audi,gas,std,four,wagon,fwd,front,105.8,...,136,mpfi,3.19,3.4,8.5,110,5500,19,25,18920
7,1,158.0,audi,gas,turbo,four,sedan,fwd,front,105.8,...,131,mpfi,3.13,3.4,8.3,140,5500,17,20,23875
9,2,192.0,bmw,gas,std,two,sedan,rwd,front,101.2,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16430
10,0,192.0,bmw,gas,std,four,sedan,rwd,front,101.2,...,108,mpfi,3.5,2.8,8.8,101,5800,23,29,16925


Question2: Find the name of the columns of the dataframe.

In [29]:
print(df.columns)

Index(['symboling', 'normalized-losses', 'make', 'fuel-type', 'aspiration',
       'num-of-doors', 'body-style', 'drive-wheels', 'engine-location',
       'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-type',
       'num-of-cylinders', 'engine-size', 'fuel-system', 'bore', 'stroke',
       'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg',
       'highway-mpg', 'price'],
      dtype='object')


Save Dataset

Correspondingly, Pandas enables you to save the data set to CSV. By using the dataframe.to_csv() method, you can add the file path and name along with quotation marks in the brackets.

For example, if you save the data frame df as automobile.csv to your local machine, you may use the syntax below, where index = False means the row names will not be written. 

In [30]:
df.to_csv("automobile.csv", index=False)

In [33]:
# check the data type of data frame "df" by .dtypes
print(df.dtypes)

symboling              int64
normalized-losses     object
make                  object
fuel-type             object
aspiration            object
num-of-doors          object
body-style            object
drive-wheels          object
engine-location       object
wheel-base           float64
length               float64
width                float64
height               float64
curb-weight            int64
engine-type           object
num-of-cylinders      object
engine-size            int64
fuel-system           object
bore                  object
stroke                object
compression-ratio    float64
horsepower            object
peak-rpm              object
city-mpg               int64
highway-mpg            int64
price                 object
dtype: object


In [34]:
# This shows the statistical summary of all numeric-typed (int, float) columns.
df.describe()

Unnamed: 0,symboling,wheel-base,length,width,height,curb-weight,engine-size,compression-ratio,city-mpg,highway-mpg
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.83,98.848,174.228,65.898,53.7915,2555.705,126.86,10.1701,25.2,30.705
std,1.248557,6.038261,12.347132,2.102904,2.428449,518.594552,41.650501,4.014163,6.432487,6.827227
min,-2.0,86.6,141.1,60.3,47.8,1488.0,61.0,7.0,13.0,16.0
25%,0.0,94.5,166.675,64.175,52.0,2163.0,97.75,8.575,19.0,25.0
50%,1.0,97.0,173.2,65.5,54.1,2414.0,119.5,9.0,24.0,30.0
75%,2.0,102.4,183.5,66.675,55.525,2928.25,142.0,9.4,30.0,34.0
max,3.0,120.9,208.1,72.0,59.8,4066.0,326.0,23.0,49.0,54.0


In [35]:
# describe all the columns in "df" 
df.describe(include = "all")

Unnamed: 0,symboling,normalized-losses,make,fuel-type,aspiration,num-of-doors,body-style,drive-wheels,engine-location,wheel-base,...,engine-size,fuel-system,bore,stroke,compression-ratio,horsepower,peak-rpm,city-mpg,highway-mpg,price
count,200.0,164.0,200,200,200,198,200,200,200,200.0,...,200.0,200,196.0,196.0,200.0,198.0,198.0,200.0,200.0,200.0
unique,,51.0,22,2,2,2,5,3,2,,...,,8,38.0,36.0,,58.0,22.0,,,185.0
top,,161.0,toyota,gas,std,four,sedan,fwd,front,,...,,mpfi,3.62,3.4,,68.0,5500.0,,,16500.0
freq,,11.0,32,180,164,113,94,118,197,,...,,91,23.0,19.0,,19.0,36.0,,,2.0
mean,0.83,,,,,,,,,98.848,...,126.86,,,,10.1701,,,25.2,30.705,
std,1.248557,,,,,,,,,6.038261,...,41.650501,,,,4.014163,,,6.432487,6.827227,
min,-2.0,,,,,,,,,86.6,...,61.0,,,,7.0,,,13.0,16.0,
25%,0.0,,,,,,,,,94.5,...,97.75,,,,8.575,,,19.0,25.0,
50%,1.0,,,,,,,,,97.0,...,119.5,,,,9.0,,,24.0,30.0,
75%,2.0,,,,,,,,,102.4,...,142.0,,,,9.4,,,30.0,34.0,


Question #3:

You can select the columns of a dataframe by indicating the name of each column. For example, you can select the three columns as follows:

dataframe[[' column 1 ',column 2', 'column 3']]

Where "column" is the name of the column, you can apply the method ".describe()" to get the statistics of those columns as follows:

dataframe[[' column 1 ',column 2', 'column 3'] ].describe()

Apply the method to ".describe()" to the columns 'length' and 'compression-ratio'.


In [36]:
df[['length', 'compression-ratio']].describe()

Unnamed: 0,length,compression-ratio
count,200.0,200.0
mean,174.228,10.1701
std,12.347132,4.014163
min,141.1,7.0
25%,166.675,8.575
50%,173.2,9.0
75%,183.5,9.4
max,208.1,23.0


In [39]:
# Info , You can also use another method to check your data set: 
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 200 entries, 0 to 203
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          200 non-null    int64  
 1   normalized-losses  164 non-null    object 
 2   make               200 non-null    object 
 3   fuel-type          200 non-null    object 
 4   aspiration         200 non-null    object 
 5   num-of-doors       198 non-null    object 
 6   body-style         200 non-null    object 
 7   drive-wheels       200 non-null    object 
 8   engine-location    200 non-null    object 
 9   wheel-base         200 non-null    float64
 10  length             200 non-null    float64
 11  width              200 non-null    float64
 12  height             200 non-null    float64
 13  curb-weight        200 non-null    int64  
 14  engine-type        200 non-null    object 
 15  num-of-cylinders   200 non-null    object 
 16  engine-size        200 non-null

### Hands-on Practice Lab: Importing Dataset - Laptops Pricing

Estimated time needed: 20 minutes

In this lab, you will practice the process of loading and drawing basic insights on a dataset as learnt through the module. You are being provided with a fresh dataset on 'Laptop Pricing' which will be used for all the practice labs throughout the course.

Objectives

After completing this lab you will be able to:

    Import a dataset from a CSV file to a Pandas dataframe
    Develop some basic insights about the dataset

Setup

For this lab, we will be using the following libraries:

    skillsnetwork for downloading the daataset

    pandas for managing the data.

    numpy for mathematical operations.

In [42]:
# Importing Required Libraries
import pandas as pd
import numpy as np

%pip install aiohttp

import aiohttp
import asyncio

async def download(url, filename):
    async with aiohttp.ClientSession() as s:
        async with s.get(url) as resp:
            resp.raise_for_status()
            with open(filename, "wb") as f:
                while True:
                    chunk = await resp.content.read(8192)
                    if not chunk:
                        break
                    f.write(chunk)           
                

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [44]:
fp = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-Coursera/laptop_pricing_dataset_base.csv"

In [46]:
await download(fp, "laptops.csv")
file_name="laptops.csv"

In [47]:
df = pd.read_csv(file_name)

Task #1:
Load the dataset to a pandas dataframe named 'df'
Print the first 5 entries of the dataset to confirm loading. 

In [49]:
df = pd.read_csv(file_name, header=None)
print(df.head())

     0   1          2   3   4   5       6    7   8    9     10    11
0  Acer   4  IPS Panel   2   1   5   35.56  1.6   8  256   1.6   978
1  Dell   3    Full HD   1   1   3  39.624  2.0   4  256   2.2   634
2  Dell   3    Full HD   1   1   7  39.624  2.7   8  256   2.2   946
3  Dell   4  IPS Panel   2   1   5  33.782  1.6   8  128  1.22  1244
4    HP   4    Full HD   2   1   7  39.624  1.8   8  256  1.91   837


Task #2:
Add headers to the dataframe
The headers for the dataset, in sequence, are "Manufacturer", "Category", "Screen", "GPU", "OS", "CPU_core", "Screen_Size_inch", "CPU_frequency", "RAM_GB", "Storage_GB_SSD", "Weight_kg" and "Price".

Confirm insertion by printing the first 10 rows of the dataset.

In [50]:
# create headers list
headers = ["Manufacturer", "Category", "Screen", "GPU", "OS", "CPU_core", "Screen_Size_inch", "CPU_frequency", "RAM_GB", "Storage_GB_SSD", "Weight_kg", "Price"]
df.columns = headers
print(df.head(10))

  Manufacturer  Category     Screen  GPU  OS  CPU_core Screen_Size_inch  \
0         Acer         4  IPS Panel    2   1         5            35.56   
1         Dell         3    Full HD    1   1         3           39.624   
2         Dell         3    Full HD    1   1         7           39.624   
3         Dell         4  IPS Panel    2   1         5           33.782   
4           HP         4    Full HD    2   1         7           39.624   
5         Dell         3    Full HD    1   1         5           39.624   
6           HP         3    Full HD    3   1         5           39.624   
7         Acer         3  IPS Panel    2   1         5             38.1   
8         Dell         3    Full HD    1   1         5           39.624   
9         Acer         3  IPS Panel    3   1         7             38.1   

   CPU_frequency  RAM_GB  Storage_GB_SSD Weight_kg  Price  
0            1.6       8             256       1.6    978  
1            2.0       4             256       2.2    

Task #3:
Replace '?' with 'NaN'
Replace the '?' entries in the dataset with NaN value, recevied from the Numpy package. 

In [51]:
df.replace('?',np.nan, inplace = True)

Task #4:
Print the data types of the dataframe columns
Make a note of the data types of the different columns of the dataset. 

In [52]:
print(df.dtypes)

Manufacturer         object
Category              int64
Screen               object
GPU                   int64
OS                    int64
CPU_core              int64
Screen_Size_inch     object
CPU_frequency       float64
RAM_GB                int64
Storage_GB_SSD        int64
Weight_kg            object
Price                 int64
dtype: object


Task #5:
Print the statistical description of the dataset, including that of 'object' data types.

In [53]:
print(df.describe(include='all'))

       Manufacturer    Category   Screen         GPU          OS    CPU_core  \
count           238  238.000000      238  238.000000  238.000000  238.000000   
unique           11         NaN        2         NaN         NaN         NaN   
top            Dell         NaN  Full HD         NaN         NaN         NaN   
freq             71         NaN      161         NaN         NaN         NaN   
mean            NaN    3.205882      NaN    2.151261    1.058824    5.630252   
std             NaN    0.776533      NaN    0.638282    0.235790    1.241787   
min             NaN    1.000000      NaN    1.000000    1.000000    3.000000   
25%             NaN    3.000000      NaN    2.000000    1.000000    5.000000   
50%             NaN    3.000000      NaN    2.000000    1.000000    5.000000   
75%             NaN    4.000000      NaN    3.000000    1.000000    7.000000   
max             NaN    5.000000      NaN    3.000000    2.000000    7.000000   

       Screen_Size_inch  CPU_frequency 

Task #6:
Print the summary information of the dataset.

In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   symboling          205 non-null    int64  
 1   normalized-losses  164 non-null    float64
 2   make               205 non-null    object 
 3   fuel-type          205 non-null    object 
 4   aspiration         205 non-null    object 
 5   num-of-doors       203 non-null    object 
 6   body-style         205 non-null    object 
 7   drive-wheels       205 non-null    object 
 8   engine-location    205 non-null    object 
 9   wheel-base         205 non-null    float64
 10  length             205 non-null    float64
 11  width              205 non-null    float64
 12  height             205 non-null    float64
 13  curb-weight        205 non-null    int64  
 14  engine-type        205 non-null    object 
 15  num-of-cylinders   205 non-null    object 
 16  engine-size        205 non

x-x-x-x-x-End of Module-1-x-x-x-x--x