In [11]:
import requests
import pandas as pd
from io import StringIO


In [12]:
url = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DA0101EN-Coursera/laptop_pricing_dataset_mod1.csv"


# Send a GET request to the URL
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Use StringIO to treat the content as a file-like object for pandas
    csv_data = StringIO(response.text)

    # Load the CSV content into a DataFrame
    df = pd.read_csv(csv_data)

    # Display the first few rows of the DataFrame
    print(df.head())
else:
    print(f"Failed to download the file. Status code: {response.status_code}")

   Unnamed: 0 Manufacturer  Category     Screen  GPU  OS  CPU_core  \
0           0         Acer         4  IPS Panel    2   1         5   
1           1         Dell         3    Full HD    1   1         3   
2           2         Dell         3    Full HD    1   1         7   
3           3         Dell         4  IPS Panel    2   1         5   
4           4           HP         4    Full HD    2   1         7   

   Screen_Size_cm  CPU_frequency  RAM_GB  Storage_GB_SSD  Weight_kg  Price  
0          35.560            1.6       8             256       1.60    978  
1          39.624            2.0       4             256       2.20    634  
2          39.624            2.7       8             256       2.20    946  
3          33.782            1.6       8             128       1.22   1244  
4          39.624            1.8       8             256       1.91    837  


In [13]:
# Identifying columns with missing values
missing_values = df.isnull().sum()

# Filtering columns that have missing values
columns_with_missing_values = missing_values[missing_values > 0]

# Printing the columns with missing values and their count
print("Columns with missing values:")
print(columns_with_missing_values)


Columns with missing values:
Screen_Size_cm    4
Weight_kg         5
dtype: int64


In [14]:
# prompt: Write a Python code to replace the missing values in a pandas data frame, per the following guidelines.
# 1. For a categorical attribute "Screen_Size_cm", replace the missing values with the most frequent value in the column.
# 2. For a continuous value attribute "Weight_kg", replace the missing values with the mean value of the entries in the column.

# Replace missing values in 'Screen_Size_cm' with the most frequent value
most_frequent_screen_size = df['Screen_Size_cm'].mode()[0]
df['Screen_Size_cm'].fillna(most_frequent_screen_size, inplace=True)

# Replace missing values in 'Weight_kg' with the mean value
mean_weight = df['Weight_kg'].mean()
df['Weight_kg'].fillna(mean_weight, inplace=True)


In [15]:
# prompt: Write a Python code to modify the contents under the following attributes of the data frame as required.
# 1. Data under 'Screen_Size_cm' is assumed to be in centimeters. Convert this data into inches. Modify the name of the attribute to 'Screen_Size_inch'.
# 2. Data under 'Weight_kg' is assumed to be in kilograms. Convert this data into pounds. Modify the name of the attribute to 'Weight_pounds'.

# Convert 'Screen_Size_cm' to inches and rename to 'Screen_Size_inch'
df['Screen_Size_inch'] = df['Screen_Size_cm'] * 0.393701
df = df.drop('Screen_Size_cm', axis=1)

# Convert 'Weight_kg' to pounds and rename to 'Weight_pounds'
df['Weight_pounds'] = df['Weight_kg'] * 2.20462
df = df.drop('Weight_kg', axis=1)


In [16]:
# prompt: Write a Python code to normalize the content under the attribute "CPU_frequency" in a data frame df concerning its maximum value. Make changes to the original data, and do not create a new attribute.

# Find the maximum value in the 'CPU_frequency' column
max_cpu_frequency = df['CPU_frequency'].max()

# Normalize the 'CPU_frequency' column by dividing each value by the maximum value
df['CPU_frequency'] = df['CPU_frequency'] / max_cpu_frequency


In [17]:
# prompt: Write a Python code to perform the following tasks.
# 1. Convert a data frame df attribute "Screen", into indicator variables, saved as df1, with the naming convention "Screen_<unique value of the attribute>".
# 2. Append df1 into the original data frame df.
# 3. Drop the original attribute from the data frame df.

import pandas as pd
# Create indicator variables for the 'Screen' attribute
df1 = pd.get_dummies(df['Screen'], prefix='Screen')

# Append df1 to the original DataFrame df
df = pd.concat([df, df1], axis=1)

# Drop the original 'Screen' attribute
df = df.drop('Screen', axis=1)


In [18]:
# prompt: Generate Python code that converts the values under the 'Price' column from USD to Euros.
# Assume that the current exchange rate is 1 USD = 0.90 Euros.
# The code should:
# 1. Create a new column named 'Price_Euros'.
# 2. Populate this new column with the converted values from the 'Price' column.
# 3. Round the converted values to two decimal places.
# 4. Do not modify the original 'Price' column.

# Convert 'Price' from USD to Euros and create a new column 'Price_Euros'
df['Price_Euros'] = (df['Price'] * 0.90).round(2)


In [19]:
# prompt: Modify the normalization prompt to perform min-max normalization on the CPU_frequency parameter

# Find the minimum and maximum values in the 'CPU_frequency' column
min_cpu_frequency = df['CPU_frequency'].min()
max_cpu_frequency = df['CPU_frequency'].max()

# Perform min-max normalization on the 'CPU_frequency' column
df['CPU_frequency'] = (df['CPU_frequency'] - min_cpu_frequency) / (max_cpu_frequency - min_cpu_frequency)
