In [1]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('sales_data.csv')

# **Exercise 1: Loading Data**
# 1. Get a summary of the DataFrame
print(df.info())
# 2. Get a summary of the numeric columns
print(df.describe())

# **Exercise 2: Accessing Data**
# 1. Access the first 5 rows
print(df.head())
# 2. Access a specific column (Product or Price)
print(df['Product'].unique())  # or df['Price'].unique()
# 3. Access rows where Sales > 500
print(df[df['Sales'] > 500])

# **Exercise 3: Filtering Data**
# 1. Filter data for Region = East
print(df[df['Region'] == 'East'])
# 2. Filter data where Price > 100 and Sales < 1000
print(df[(df['Price'] > 100) & (df['Sales'] < 1000)])

# **Exercise 4: Handling Missing Values**
# 1. Check for missing values
print(df.isnull().sum())
# 2. Fill missing values in the Sales column with the median
df['Sales'].fillna(df['Sales'].median(), inplace=True)
# 3. Drop rows where Product is missing
df.dropna(subset=['Product'], inplace=True)

# **Exercise 5: Adding New Column**
# 1. Add a new column Discounted_Price (10% off)
df['Discounted_Price'] = df['Price'] * 0.9
# 2. Display the first 5 rows to confirm
print(df.head())

# **Exercise 6: Dropping Columns**
# 1. Drop the Discounted_Price column
df.drop(columns=['Discounted_Price'], inplace=True)
# 2. Confirm the column has been dropped
print(df.columns)

# **Exercise 7: Combining NumPy Functions**
# 1. Create a new column Profit (Sales - Cost)
df['Profit'] = df['Sales'] - df['Cost']
# 2. Create a new column Log_Profit using np.log()
df['Log_Profit'] = np.log(df['Profit'])
# Display the first 5 rows to confirm
print(df.head())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Product  26 non-null     object
 1   Price    26 non-null     int64 
 2   Sales    26 non-null     int64 
 3   Cost     26 non-null     int64 
 4   Region   26 non-null     object
dtypes: int64(3), object(2)
memory usage: 1.1+ KB
None
             Price       Sales        Cost
count    26.000000   26.000000   26.000000
mean    414.038462  342.307692  248.269231
std     315.594738  250.683987  206.692730
min      50.000000   80.000000   30.000000
25%     162.500000  142.500000   80.000000
50%     340.000000  215.000000  140.000000
75%     615.000000  537.500000  415.000000
max    1100.000000  900.000000  700.000000
      Product  Price  Sales  Cost Region
0      Laptop   1000    800   600  North
1  Smartphone    600    500   400  South
2      Tablet    400    300   250   East
3     Monitor    300    150   100

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Sales'].fillna(df['Sales'].median(), inplace=True)
