### Filtering on Columns

In [20]:
import pandas as pd

In [21]:
# pd.read_csv() reads a CSV file into a DataFrame (a table-like structure with rows and columns).
# The encoding='ISO-8859-1' handles character encodings to avoid errors when the CSV contains special characters.


df = pd.read_csv(r"C:\Users\hunte\Documents\Analyst Workshop\Pandas For Data Analysis\3. Filtering and Ordering\laptop_prices.csv",
                encoding = 'ISO-8859-1')
df

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_usd
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1473.659
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,988.834
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,632.500
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2791.195
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1983.960
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,701.800
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1648.900
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,251.900
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,840.400


In [22]:
# These commands limit the number of rows and columns shown in the output, making it more readable.
# display.max.rows: Limits how many rows are shown.
# display.max.columns: Limits how many columns are shown.


pd.set_option('display.max.rows', 15)
pd.set_option('display.max.columns', 15)

In [23]:
df

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_usd
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1473.659
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,988.834
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,632.500
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2791.195
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1983.960
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,701.800
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1648.900
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,251.900
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,840.400


In [24]:
# Code: df['Company']

# Explanation:
# Extracts the ‘Company’ column from the DataFrame, returning a Series (a one-dimensional array of data).
# This allows you to view data from a single column.

df['Company']

0        Apple
1        Apple
2           HP
3        Apple
4        Apple
         ...  
1298    Lenovo
1299    Lenovo
1300    Lenovo
1301        HP
1302      Asus
Name: Company, Length: 1303, dtype: object

In [25]:
# Code: type(df['Company'])
# Explanation:
# This shows that the result of df['Company'] is a pandas.Series. A Series is the data structure used for a single column of a DataFrame.

type(df['Company'])

pandas.core.series.Series

In [26]:
# Extracting Multiple Columns
# Code: df[['Company', 'Product', 'Price_usd']]

# Explanation:
# To select multiple columns, double square brackets are used.
# This returns a new DataFrame containing only the specified columns.


df[['Company', 'Product','Price_usd']]

Unnamed: 0,Company,Product,Price_usd
0,Apple,MacBook Pro,1473.659
1,Apple,Macbook Air,988.834
2,HP,250 G6,632.500
3,Apple,MacBook Pro,2791.195
4,Apple,MacBook Pro,1983.960
...,...,...,...
1298,Lenovo,Yoga 500-14ISK,701.800
1299,Lenovo,Yoga 900-13ISK,1648.900
1300,Lenovo,IdeaPad 100S-14IBR,251.900
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,840.400


In [27]:
# Code: type(df[['Company', 'Product', 'Price_usd']])
# Explanation:
# The result is a DataFrame, which is a 2-dimensional data structure in Pandas (rows and columns).

type(df[['Company', 'Product','Price_usd']])

pandas.core.frame.DataFrame

In [28]:
# Selecting All Rows and Columns with .loc
# Code: df.loc[:]
# Explanation:
# .loc selects all rows and columns.
# : means "select everything" for both rows and columns.

df.loc[:]

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_usd
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1473.659
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,988.834
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,632.500
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2791.195
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1983.960
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Lenovo,Yoga 500-14ISK,2 in 1 Convertible,14.0,IPS Panel Full HD / Touchscreen 1920x1080,Intel Core i7 6500U 2.5GHz,4GB,128GB SSD,Intel HD Graphics 520,Windows 10,1.8kg,701.800
1299,1317,Lenovo,Yoga 900-13ISK,2 in 1 Convertible,13.3,IPS Panel Quad HD+ / Touchscreen 3200x1800,Intel Core i7 6500U 2.5GHz,16GB,512GB SSD,Intel HD Graphics 520,Windows 10,1.3kg,1648.900
1300,1318,Lenovo,IdeaPad 100S-14IBR,Notebook,14.0,1366x768,Intel Celeron Dual Core N3050 1.6GHz,2GB,64GB Flash Storage,Intel HD Graphics,Windows 10,1.5kg,251.900
1301,1319,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,Notebook,15.6,1366x768,Intel Core i7 6500U 2.5GHz,6GB,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19kg,840.400


In [29]:
# Code: df.loc[:, 'Company']
# Explanation:
# .loc[:, 'Company'] selects all rows for the ‘Company’ column.

df.loc[:, 'Company']

0        Apple
1        Apple
2           HP
3        Apple
4        Apple
         ...  
1298    Lenovo
1299    Lenovo
1300    Lenovo
1301        HP
1302      Asus
Name: Company, Length: 1303, dtype: object

In [30]:
# Code: df.loc[:, ['Company', 'Product', 'Price_usd']]
# Explanation:
# This selects all rows for ‘Company,’ ‘Product,’ and ‘Price_usd’ columns using .loc.


df.loc[:,['Company', 'Product','Price_usd']]

Unnamed: 0,Company,Product,Price_usd
0,Apple,MacBook Pro,1473.659
1,Apple,Macbook Air,988.834
2,HP,250 G6,632.500
3,Apple,MacBook Pro,2791.195
4,Apple,MacBook Pro,1983.960
...,...,...,...
1298,Lenovo,Yoga 500-14ISK,701.800
1299,Lenovo,Yoga 900-13ISK,1648.900
1300,Lenovo,IdeaPad 100S-14IBR,251.900
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,840.400


In [31]:
# Selecting Data by Index Position with .iloc

# Explanation:
# .iloc selects rows and columns by their index numbers (instead of column names).
# Example 1: df.iloc[:, 1] selects all rows of the second column.
# Example 2: df.iloc[:, [1, 2, 12]] selects the 2nd, 3rd, and 13th columns.


df.iloc[:,1]

0        Apple
1        Apple
2           HP
3        Apple
4        Apple
         ...  
1298    Lenovo
1299    Lenovo
1300    Lenovo
1301        HP
1302      Asus
Name: Company, Length: 1303, dtype: object

In [32]:
df.iloc[:,[1,2,12]]

Unnamed: 0,Company,Product,Price_usd
0,Apple,MacBook Pro,1473.659
1,Apple,Macbook Air,988.834
2,HP,250 G6,632.500
3,Apple,MacBook Pro,2791.195
4,Apple,MacBook Pro,1983.960
...,...,...,...
1298,Lenovo,Yoga 500-14ISK,701.800
1299,Lenovo,Yoga 900-13ISK,1648.900
1300,Lenovo,IdeaPad 100S-14IBR,251.900
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,840.400


In [33]:
# Filtering Columns Using filter()

# Code: df.filter(items=['Company', 'Product', 'Price_usd'], axis=1)
# Explanation:
# .filter() selects specific columns or rows based on the provided labels.
# axis=1 means we are filtering by columns.
# This returns a DataFrame with only the ‘Company,’ ‘Product,’ and ‘Price_usd’ columns.


df.filter(items = ['Company', 'Product','Price_usd'], axis = 1)

Unnamed: 0,Company,Product,Price_usd
0,Apple,MacBook Pro,1473.659
1,Apple,Macbook Air,988.834
2,HP,250 G6,632.500
3,Apple,MacBook Pro,2791.195
4,Apple,MacBook Pro,1983.960
...,...,...,...
1298,Lenovo,Yoga 500-14ISK,701.800
1299,Lenovo,Yoga 900-13ISK,1648.900
1300,Lenovo,IdeaPad 100S-14IBR,251.900
1301,HP,15-AC110nv (i7-6500U/6GB/1TB/Radeon,840.400


In [34]:
# What happens here, why isnt anything showing up

# Attempting to Filter Rows Instead of Columns
# Code: df.filter(items=['Company', 'Product', 'Price_usd'], axis=0)
# Explanation:
# Issue: This doesn't work as expected because axis=0 refers to rows, but the provided items ('Company', etc.) are column names.
# Solution: Use axis=1 when selecting columns, as rows require numeric or index-based filtering.

df.filter(items = ['Company', 'Product','Price_usd'], axis = 0)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_usd
