# Best Practices for Efficient & Clean Pandas Code

## Importing Libraries:

In [1]:
import pandas as pd
from faker import Faker


## Generating Sample Data with Faker:

In [2]:
fake = Faker()

# Create sample data for demonstration
data = {
    'Name': [fake.name() for _ in range(1000)],
    'Email': [fake.email() for _ in range(1000)],
    'Age': [fake.random_int(18, 80) for _ in range(1000)],
    'Salary': [fake.random_int(30000, 150000) for _ in range(1000)],
    'City': [fake.city() for _ in range(1000)]
}

df = pd.DataFrame(data)


## Best Practices for Efficient Pandas Code:
### Vectorization:
Avoid looping over DataFrame rows, instead use vectorized operations for better performance.

In [3]:
# Bad Practice
for index, row in df.iterrows():
    df.at[index, 'Salary'] *= 1.1

# Better Practice
df['Salary'] *= 1.1


  df.at[index, 'Salary'] *= 1.1


### Method Chaining:
Use method chaining to perform operations on DataFrame in a concise and readable manner.

In [4]:
# Bad Practice
df = df.dropna()
df = df[df['Age'] > 30]

# Better Practice
df = df.dropna().loc[df['Age'] > 30]


## est Practices for Clean Pandas Code:
### Descriptive Variable Names:
Use descriptive variable names to enhance code readability.

In [5]:
# Bad Practice
df2 = df[(df['Age'] > 30) & (df['Salary'] > 80000)]

# Better Practice
high_earners = df[(df['Age'] > 30) & (df['Salary'] > 80000)]


### Consistent Formatting:
Maintain consistent formatting for improved code readability.

In [6]:
# Bad Practice
df['Email'] = df['Email'].apply(lambda x: x.lower())

# Better Practice
df['Email'] = df['Email'].apply(str.lower)
