# Import Required Libraries

In this section, we import the essential libraries for data analysis and visualization: pandas and matplotlib.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Optional: display plots inline if using Jupyter
%matplotlib inline

# Load the Dataset

Let's load a dataset into a pandas DataFrame. Replace `'your_dataset.csv'` with the path to your actual CSV file.

In [None]:
# Load the dataset
df = pd.read_csv('your_dataset.csv')

# Display the shape of the dataset
print(f"Dataset loaded with {df.shape[0]} rows and {df.shape[1]} columns.")

# Explore the Dataset

We'll take a look at the first few rows, check the data types, and get a summary of the dataset.

In [None]:
# Display the first 5 rows
print("First 5 rows of the dataset:")
display(df.head())

# Check data types and non-null counts
print("\nDataset info:")
df.info()

# Get summary statistics for numerical columns
print("\nSummary statistics:")
display(df.describe())

# Basic Data Analysis

Let's perform some basic analysis, such as calculating means, medians, value counts, and correlations.

In [None]:
# Calculate mean and median for numerical columns
means = df.mean(numeric_only=True)
medians = df.median(numeric_only=True)
print("Means:\n", means)
print("\nMedians:\n", medians)

# Value counts for a categorical column (replace 'column_name' as needed)
if 'column_name' in df.columns:
    print("\nValue counts for 'column_name':")
    print(df['column_name'].value_counts())

# Correlation matrix
print("\nCorrelation matrix:")
display(df.corr(numeric_only=True))

# Create Visualizations

We will use matplotlib to create simple plots and charts to visualize the data.

In [None]:
# Histogram for a numerical column (replace 'numeric_column' as needed)
if 'numeric_column' in df.columns:
    plt.figure(figsize=(8, 5))
    df['numeric_column'].hist(bins=20)
    plt.title('Histogram of numeric_column')
    plt.xlabel('numeric_column')
    plt.ylabel('Frequency')
    plt.show()

# Bar chart for a categorical column (replace 'categorical_column' as needed)
if 'categorical_column' in df.columns:
    plt.figure(figsize=(8, 5))
    df['categorical_column'].value_counts().plot(kind='bar')
    plt.title('Bar Chart of categorical_column')
    plt.xlabel('categorical_column')
    plt.ylabel('Count')
    plt.show()

# Scatter plot between two numerical columns (replace as needed)
if {'numeric_column1', 'numeric_column2'}.issubset(df.columns):
    plt.figure(figsize=(8, 5))
    plt.scatter(df['numeric_column1'], df['numeric_column2'])
    plt.title('Scatter Plot between numeric_column1 and numeric_column2')
    plt.xlabel('numeric_column1')
    plt.ylabel('numeric_column2')
    plt.show()

# Document Findings or Observations

Summarize any key findings or observations based on the analysis and visualizations above. For example, you might note trends, outliers, or interesting relationships between variables.