In [3]:
import pandas as pd

# Load the DataFrame from a CSV file
housing_data = pd.read_csv('Housing.csv')

df = pd.DataFrame(housing_data)

# Display the first and last five rows of the DataFrame
print("First five rows:")
print(df.head())
print("\nLast five rows:")
print(df.tail())

# Set a specific column as the index of the DataFrame
df.set_index('price', inplace=True)

# Select a specific column and display its values
area_values = df['area']
print("\nArea values:")
print(area_values)

# Select multiple columns and display the resulting DataFrame
selected_columns = df[['area', 'bedrooms', 'bathrooms']]
print("\nSelected columns:")
print(selected_columns)

# Select a subset of rows using the .loc method
subset_loc = df.loc[12250000]
print("\nSubset using .loc:")
print(subset_loc)

# Select a subset of rows and columns using the .iloc method
subset_iloc = df.iloc[2:5, 0:3]
print("\nSubset using .iloc:")
print(subset_iloc)

# Filter rows based on a condition
filtered_rows = df[df['bedrooms'] == 4]
print("\nFiltered rows where bedrooms == 4:")
print(filtered_rows)

# Group the DataFrame by a specific column and calculate the mean of each group
grouped_mean = df.groupby('stories').mean(numeric_only=True)
print("\nGrouped by stories and mean calculated:")
print(grouped_mean)

# Group the DataFrame by multiple columns and calculate the sum of each group
grouped_sum = df.groupby(['stories', 'furnishingstatus']).sum(numeric_only=True)
print("\nGrouped by stories and furnishingstatus and sum calculated:")
print(grouped_sum)

# Use the agg method to apply multiple aggregation functions to grouped data
grouped_agg = df.groupby('stories').agg({'area': ['mean', 'sum'], 'bedrooms': 'count'})
print("\nAggregated grouped data:")
print(grouped_agg)

# Calculate the size of each group
group_size = df.groupby('stories').size()
print("\nSize of each group:")
print(group_size)

# Select rows based on multiple conditions
multiple_conditions = df[(df['bedrooms'] == 4) & (df['bathrooms'] == 2)]
print("\nRows based on multiple conditions (bedrooms == 4 and bathrooms == 2):")
print(multiple_conditions)

# Use the query method to filter rows
query_filtered = df.query('bedrooms == 4 and bathrooms == 2')
print("\nQuery method filtered rows:")
print(query_filtered)

# Use isin to filter rows based on a list of values
isin_filtered = df[df['furnishingstatus'].isin(['furnished', 'unfurnished'])]
print("\nRows where furnishingstatus is in ['furnished', 'unfurnished']:")
print(isin_filtered)

# Select specific columns and rename them
renamed_columns = df[['area', 'bedrooms', 'bathrooms']].rename(columns={'area': 'Area', 'bedrooms': 'Bedrooms', 'bathrooms': 'Bathrooms'})
print("\nRenamed columns:")
print(renamed_columns)


First five rows:
      price  area  bedrooms  bathrooms  stories mainroad guestroom basement  \
0  13300000  7420         4          2        3      yes        no       no   
1  12250000  8960         4          4        4      yes        no       no   
2  12250000  9960         3          2        2      yes        no      yes   
3  12215000  7500         4          2        2      yes        no      yes   
4  11410000  7420         4          1        2      yes       yes      yes   

  hotwaterheating airconditioning  parking prefarea furnishingstatus  
0              no             yes        2      yes        furnished  
1              no             yes        3       no        furnished  
2              no              no        2      yes   semi-furnished  
3              no             yes        3      yes        furnished  
4              no             yes        2       no        furnished  

Last five rows:
       price  area  bedrooms  bathrooms  stories mainroad guestro