In [2]:
import pandas as pd

Task 1: Load a DataFrame from a CSV file

In [24]:
df = pd.read_csv('homeprices_data.csv')

Task 2: Display the first and last five rows of the DataFrame

In [25]:
print("\nFirst five rows of DataFrame:\n", df.head())
print("\nLast five rows of DataFrame:\n", df.tail())


First five rows of DataFrame:
    area  bedrooms   price
0  1056         2   39.07
1  2600         4  120.00
2  1440         3   62.00
3  1521         3   75.00
4  1200         2   51.00

Last five rows of DataFrame:
     area  bedrooms  price
15  1175         2   42.0
16  1180         3   48.0
17  1540         3   60.0
18  2770         3  102.0
19   800         1   32.0


Task 3: Set a specific column as the index

In [8]:
df.set_index('area', inplace=True)
print("\nDataFrame with 'Area' set as the index:\n", df.head())


DataFrame with 'Area' set as the index:
       bedrooms   price
area                  
1056         2   39.07
2600         4  120.00
1440         3   62.00
1521         3   75.00
1200         2   51.00


Task 4: Select a specific column ('Prices') and display its values

In [9]:
print("\n'Prices' column values:\n", df['price'])


'Prices' column values:
 area
1056     39.07
2600    120.00
1440     62.00
1521     75.00
1200     51.00
1170     38.00
2732    135.00
3300    155.00
1310     50.00
3700    167.00
1800     82.00
2785    140.00
1000     38.00
1100     40.00
2250    101.00
1175     42.00
1180     48.00
1540     60.00
2770    102.00
800      32.00
Name: price, dtype: float64


Task 5: Select multiple columns ('Bedrooms' and 'Prices') and display the resulting DataFrame

In [10]:
df_selected_columns = df[['bedrooms', 'price']]
print("\nSelected columns (Bedrooms, Prices):\n", df_selected_columns)


Selected columns (Bedrooms, Prices):
       bedrooms   price
area                  
1056         2   39.07
2600         4  120.00
1440         3   62.00
1521         3   75.00
1200         2   51.00
1170         2   38.00
2732         4  135.00
3300         4  155.00
1310         3   50.00
3700         5  167.00
1800         3   82.00
2785         4  140.00
1000         2   38.00
1100         2   40.00
2250         3  101.00
1175         2   42.00
1180         3   48.00
1540         3   60.00
2770         3  102.00
800          1   32.00


Task 6: Select a subset of rows using the .loc method (e.g., rows where 'Area' is between 2000 and 3500)

In [11]:
df_loc_subset = df.loc[1200:2785]
print("\nSubset of rows using .loc method:\n", df_loc_subset)


Subset of rows using .loc method:
       bedrooms  price
area                 
1200         2   51.0
1170         2   38.0
2732         4  135.0
3300         4  155.0
1310         3   50.0
3700         5  167.0
1800         3   82.0
2785         4  140.0


Task 7: Select a subset of rows and columns using the .iloc method (e.g., first 3 rows, first 2 columns)

In [13]:
df_iloc_subset = df.iloc[:3, :1]
print("\nSubset of rows and columns using .iloc method:\n", df_iloc_subset)


Subset of rows and columns using .iloc method:
       bedrooms
area          
1056         2
2600         4
1440         3


Task 8: Filter rows based on a condition (e.g., where 'Bedrooms' > 3)

In [15]:
df_filtered = df[df['bedrooms'] > 3]
print("\nFiltered rows where Bedrooms > 3:\n", df_filtered)


Filtered rows where Bedrooms > 3:
       bedrooms  price
area                 
2600         4  120.0
2732         4  135.0
3300         4  155.0
3700         5  167.0
2785         4  140.0


Task 9: Group the DataFrame by a specific column ('Bedrooms') and calculate the mean of each group

In [17]:
df_grouped_mean = df.groupby('bedrooms')['price'].mean()
print("\nMean prices by number of bedrooms:\n", df_grouped_mean)


Mean prices by number of bedrooms:
 bedrooms
1     32.000
2     41.345
3     72.500
4    137.500
5    167.000
Name: price, dtype: float64


Task 10: Group the DataFrame by multiple columns ('Bedrooms', 'Area') and calculate the sum of each group

In [18]:
df_grouped_sum = df.groupby(['bedrooms', 'area'])['price'].sum()
print("\nSum of prices grouped by Bedrooms and Area:\n", df_grouped_sum)


Sum of prices grouped by Bedrooms and Area:
 bedrooms  area
1         800      32.00
2         1000     38.00
          1056     39.07
          1100     40.00
          1170     38.00
          1175     42.00
          1200     51.00
3         1180     48.00
          1310     50.00
          1440     62.00
          1521     75.00
          1540     60.00
          1800     82.00
          2250    101.00
          2770    102.00
4         2600    120.00
          2732    135.00
          2785    140.00
          3300    155.00
5         3700    167.00
Name: price, dtype: float64


Task 11: Use the agg method to apply multiple aggregation functions (e.g., 'mean', 'sum', 'max') on 'Prices'

In [26]:
df_agg = df.groupby('bedrooms').agg({'price': ['mean', 'sum', 'max'], 'area': ['min', 'max']})
print("\nAggregated statistics:\n", df_agg)


Aggregated statistics:
             price                 area      
             mean     sum    max   min   max
bedrooms                                    
1          32.000   32.00   32.0   800   800
2          41.345  248.07   51.0  1000  1200
3          72.500  580.00  102.0  1180  2770
4         137.500  550.00  155.0  2600  3300
5         167.000  167.00  167.0  3700  3700


Task 12: Calculate the size of each group based on 'Bedrooms'

In [27]:
df_group_size = df.groupby('bedrooms').size()
print("\nNumber of properties in each bedroom group:\n", df_group_size)


Number of properties in each bedroom group:
 bedrooms
1    1
2    6
3    8
4    4
5    1
dtype: int64


Task 13: Select rows based on multiple conditions (e.g., 'Bedrooms' > 3 and 'Prices' > 500000)

In [36]:
df_multi_condition = df[(df['bedrooms'] > 2) & (df['price'] > 100)]
print("\nRows where Bedrooms > 3 and Prices > 100,000:\n", df_multi_condition)


Rows where Bedrooms > 3 and Prices > 100,000:
     area  bedrooms  price
1   2600         4  120.0
6   2732         4  135.0
7   3300         4  155.0
9   3700         5  167.0
11  2785         4  140.0
14  2250         3  101.0
18  2770         3  102.0


Task 14: Use the query method to filter rows where 'Prices' > 400000

In [38]:
df_query_result = df.query("price > 90")
print("\nRows filtered using query method (Prices > 90,000):\n", df_query_result)


Rows filtered using query method (Prices > 90,000):
     area  bedrooms  price
1   2600         4  120.0
6   2732         4  135.0
7   3300         4  155.0
9   3700         5  167.0
11  2785         4  140.0
14  2250         3  101.0
18  2770         3  102.0


Task 15: Use isin to filter rows based on a list of values (e.g., Bedrooms is either 2 or 4)

In [42]:
df_isin_filtered = df[df['bedrooms'].isin([2, 1,4])]
print("\nRows where Bedrooms is 2 or 4:\n", df_isin_filtered)


Rows where Bedrooms is 2 or 4:
     area  bedrooms   price
0   1056         2   39.07
1   2600         4  120.00
4   1200         2   51.00
5   1170         2   38.00
6   2732         4  135.00
7   3300         4  155.00
11  2785         4  140.00
12  1000         2   38.00
13  1100         2   40.00
15  1175         2   42.00
19   800         1   32.00


Task 16: Select specific columns ('Bedrooms', 'Prices') and rename them

In [43]:
df_renamed = df[['bedrooms', 'price']].rename(columns={'bedrooms': 'Total Bedrooms', 'price': 'House Price'})
print("\nDataFrame with selected and renamed columns:\n", df_renamed)


DataFrame with selected and renamed columns:
     Total Bedrooms  House Price
0                2        39.07
1                4       120.00
2                3        62.00
3                3        75.00
4                2        51.00
5                2        38.00
6                4       135.00
7                4       155.00
8                3        50.00
9                5       167.00
10               3        82.00
11               4       140.00
12               2        38.00
13               2        40.00
14               3       101.00
15               2        42.00
16               3        48.00
17               3        60.00
18               3       102.00
19               1        32.00
