# This code covers various data analysis techniques using the Pandas library, including reading and inspecting data, descriptive statistics, data aggregation, data filtering, data sorting, data merging, data visualization, data cleaning, handling missing values, feature engineering, feature selection, and machine learning.

In [None]:
import pandas as pd

# Reading and inspecting data

In [None]:
# Read data from a CSV file
df = pd.read_csv('data.csv')

In [None]:
# Display the first few rows of the DataFrame
print("First few rows:")
print(df.head())

In [None]:
# Display basic information about the DataFrame
print("\nData information:")
print(df.info())

# Descriptive statistics


In [None]:
# Calculate descriptive statistics of numeric columns
print("\nDescriptive statistics:")
print(df.describe())

# Data aggregation

In [None]:
# Group data by a column and calculate the mean of another column
grouped_data = df.groupby('Country')['Salary'].mean()
print("\nGrouped data:")
print(grouped_data)

# Data filtering

In [None]:
# Filter data based on a condition
filtered_data = df[df['Salary'] > 50000]
print("\nFiltered data:")
print(filtered_data)

# Data sorting

In [None]:
# Sort data by a column
sorted_data = df.sort_values('Salary', ascending=False)
print("\nSorted data:")
print(sorted_data)

# Data merging

In [None]:
# Read another DataFrame from a CSV file
df2 = pd.read_csv('data2.csv')

In [None]:
df2.head()

In [None]:
# Merge two DataFrames based on a common column
merged_data = pd.merge(df, df2, on='Country')
print("\nMerged data:")
print(merged_data)

# Data visualization


In [None]:
import matplotlib.pyplot as plt
# Create a bar plot of a column
df['Salary'].plot(kind='bar')
plt.title("Bar Plot")
plt.xlabel("Categories")
plt.ylabel("Values")
plt.show()

In [None]:
# Create a scatter plot of two columns
df.plot(x='Salary', y='Age', kind='scatter')
plt.title("Scatter Plot")
plt.xlabel("Value")
plt.ylabel("Count")
plt.show()

# Data cleaning

In [None]:
# Remove duplicate rows from the DataFrame
df_cleaned = df.drop_duplicates()
print("\nCleaned data:")
print(df_cleaned)

# Handling missing values

In [None]:
# Check for missing values in the DataFrame
print("\nMissing values:")
print(df.isnull().sum())

In [None]:
# Fill missing values with a specific value or strategy
df_filled = df.fillna(0)
print("\nFilled data:")
print(df_filled)

# Feature engineering

In [None]:
# Create a new column based on existing columns
df['NewColumn'] = df['Age'] + df['Salary']
print("\nDataFrame with new column:")
print(df)

# Feature selection

In [None]:
# Select specific columns from the DataFrame
selected_columns = df[['Salary', 'Age']]
print("\nSelected columns:")
print(selected_columns)