### Task 1: Basic Data Profiling of a CSV File
**Description**: Load a CSV file and generate a Pandas-Profiling report.

**Steps**:
1. Load a CSV File: Make sure you have a CSV file (e.g., data.csv ). Load it using pandas.
2. Generate a Profile Report.

In [1]:
import pandas as pd
import pandas_profiling
import streamlit as st

def generate_data_profile(file_path):
    """
    Loads a CSV file and generates a Pandas-Profiling report.

    Args:
        file_path (str): The path to the CSV file.
    """
    try:
        # Load the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        st.write(f"Loaded data from: {file_path}") # show file loaded.

        # Generate the Pandas-Profiling report
        profile = pandas_profiling.ProfileReport(df, title="Data Profile Report", explorative=True)

        # Save the report to an HTML file
        profile_file_name = file_path.replace(".csv", "_profile.html")
        profile.to_file(profile_file_name)
        st.success(f"Pandas-Profiling report generated and saved to: {profile_file_name}")

        # Display the report in Streamlit (optional, can be slow for large datasets)
        st.header("Data Profile Report")
        st.components.v1.html(profile.to_html(), width=800, height=600, scrolling=True)

    except FileNotFoundError:
        st.error(f"Error: File not found at {file_path}")
    except Exception as e:
        st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get the file path from the user using st.text_input
    default_file_path = "data.csv"  # Replace with your default CSV file name
    file_path = st.text_input("Enter the path to your CSV file:", default_file_path)

    # Generate the data profile
    generate_data_profile(file_path)


ModuleNotFoundError: No module named 'pandas_profiling'

### Task 2: Understanding Missing Values with Pandas-Profiling

**Description**: Identify missing values in your dataset using pandas-profiling.

**Steps**: 
1. Generate a Profile Report to Analyze Missing Values


In [2]:
import pandas as pd
import pandas_profiling
import streamlit as st

def generate_data_profile(file_path):
    """
    Loads a CSV file and generates a Pandas-Profiling report,
    with emphasis on missing value analysis.

    Args:
        file_path (str): The path to the CSV file.
    """
    try:
        # Load the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        st.write(f"Loaded data from: {file_path}")

        # Generate the Pandas-Profiling report
        profile = pandas_profiling.ProfileReport(df, title="Data Profile Report", explorative=True)

        # Save the report to an HTML file
        profile_file_name = file_path.replace(".csv", "_profile.html")
        profile.to_file(profile_file_name)
        st.success(f"Pandas-Profiling report generated and saved to: {profile_file_name}")

        # Display the report in Streamlit
        st.header("Data Profile Report")
        st.write(
            "The Pandas-Profiling report provides a detailed analysis of the data, including information about missing values.  "
            "Scroll through the report to find sections on 'Missing Values' for each variable, and an overview in the 'Overview' section."
        )
        st.components.v1.html(profile.to_html(), width=800, height=600, scrolling=True)

    except FileNotFoundError:
        st.error(f"Error: File not found at {file_path}")
    except Exception as e:
        st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get the file path from the user using st.text_input
    default_file_path = "data.csv"  # Replace with your default CSV file name
    file_path = st.text_input("Enter the path to your CSV file:", default_file_path)

    # Generate the data profile
    generate_data_profile(file_path)


ModuleNotFoundError: No module named 'pandas_profiling'

### Task 3: Analyze Data Types Using Pandas-Profiling
**Description**: Use Pandas-Profiling to analyze and check data types of your dataset.

In [3]:
import pandas as pd
import pandas_profiling
import streamlit as st

def generate_data_profile(file_path):
    """
    Loads a CSV file and generates a Pandas-Profiling report,
    with emphasis on data type analysis.

    Args:
        file_path (str): The path to the CSV file.
    """
    try:
        # Load the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        st.write(f"Loaded data from: {file_path}")

        # Generate the Pandas-Profiling report
        profile = pandas_profiling.ProfileReport(df, title="Data Profile Report", explorative=True)

        # Save the report to an HTML file
        profile_file_name = file_path.replace(".csv", "_profile.html")
        profile.to_file(profile_file_name)
        st.success(f"Pandas-Profiling report generated and saved to: {profile_file_name}")

        # Display the report in Streamlit
        st.header("Data Profile Report")
        st.write(
            "The Pandas-Profiling report provides a detailed analysis of the data, including data types.  "
            "The report includes a 'Variable Type' for each column in your dataset.  "
            "Review the report, especially the column details, to understand the data types."
        )
        st.components.v1.html(profile.to_html(), width=800, height=600, scrolling=True)

    except FileNotFoundError:
        st.error(f"Error: File not found at {file_path}")
    except Exception as e:
        st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get the file path from the user using st.text_input
    default_file_path = "data.csv"  # Replace with your default CSV file name
    file_path = st.text_input("Enter the path to your CSV file:", default_file_path)

    # Generate the data profile
    generate_data_profile(file_path)


ModuleNotFoundError: No module named 'pandas_profiling'

### Task 4: Detect Unique Values and Duplicates
**Description**: Use Pandas-Profiling to detect unique values and duplicates in your dataset.

In [4]:
import pandas as pd
import pandas_profiling
import streamlit as st

def generate_data_profile(file_path):
    """
    Loads a CSV file and generates a Pandas-Profiling report,
    with emphasis on unique value and duplicate detection.

    Args:
        file_path (str): The path to the CSV file.
    """
    try:
        # Load the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)
        st.write(f"Loaded data from: {file_path}")

        # Generate the Pandas-Profiling report
        profile = pandas_profiling.ProfileReport(df, title="Data Profile Report", explorative=True)

        # Save the report to an HTML file
        profile_file_name = file_path.replace(".csv", "_profile.html")
        profile.to_file(profile_file_name)
        st.success(f"Pandas-Profiling report generated and saved to: {profile_file_name}")

        # Display the report in Streamlit
        st.header("Data Profile Report")
        st.write(
            "The Pandas-Profiling report provides a detailed analysis of the data, including unique values and duplicates.  "
            "For each column, the report shows the number of unique values and the percentage of duplicate values.  "
            "Review the column details in the report to find this information."
        )
        st.components.v1.html(profile.to_html(), width=800, height=600, scrolling=True)

    except FileNotFoundError:
        st.error(f"Error: File not found at {file_path}")
    except Exception as e:
        st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    # Get the file path from the user using st.text_input
    default_file_path = "data.csv"  # Replace with your default CSV file name
    file_path = st.text_input("Enter the path to your CSV file:", default_file_path)

    # Generate the data profile
    generate_data_profile(file_path)


ModuleNotFoundError: No module named 'pandas_profiling'