# SQL,Seaborn,HTML 


In [None]:
import pandas as pd
import sqlite3
import seaborn as sns
import matplotlib.pyplot as plt
import os
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

# Set seaborn color palette to 'viridis'
sns.set_palette('Blues')

# File path to the CSV file
file_path = 'https://raw.githubusercontent.com/guzmanwolfrank/Data-SQL/BankSQL/BankSQL/data/banking_data.csv'

# Read CSV file into pandas DataFrame
df = pd.read_csv(file_path)

# Connect to SQLite database (or create it)
conn = sqlite3.connect('banking_data.db')

# Convert DataFrame to SQL
df.to_sql('banking_data', conn, if_exists='replace', index=False)

# Function to run SQL query and return the result as a DataFrame
def run_query(query):
    return pd.read_sql_query(query, conn)

# Define SQL queries with corresponding questions
queries = [
    ("SELECT * FROM banking_data LIMIT 10;", "Select first 10 rows"),  # 1. Select first 10 rows
    ("SELECT COUNT(*) AS Total_Transactions FROM banking_data;", "Count the number of rows"),  # 2. Count the number of rows
    ("SELECT DISTINCT TransactionType FROM banking_data;", "Select distinct transaction types"),  # 3. Select distinct transaction types
    ("SELECT AVG(Amount) AS Avg_Amount FROM banking_data;", "Calculate the average transaction amount"),  # 4. Calculate the average transaction amount
    ("SELECT StateID, COUNT(*) AS Transaction_Count FROM banking_data GROUP BY StateID;", "Count number of transactions per state"),  # 5. Count number of transactions per state
    ("SELECT Currency, AVG(Amount) AS Avg_Amount FROM banking_data GROUP BY Currency;", "Average transaction amount per currency"),  # 6. Average transaction amount per currency
    ("SELECT TransactionDate, Amount FROM banking_data ORDER BY Amount DESC LIMIT 5;", "Top 5 transactions by amount"),  # 7. Top 5 transactions by amount
]

# Execute queries and store the results
results = [run_query(query[0]) for query in queries]

# Close the connection
conn.close()

# Create visualizations using seaborn
sns.set(style="whitegrid", palette="Blues")

# Save Seaborn images as JPEG files
output_folder = "output_images"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Generate and save Seaborn plots
for i, (query, question) in enumerate(queries, 1):
    plt.figure(figsize=(10, 6))
    if i == 6:
        plt.figure(figsize=(12, 6))
        sns.distplot(results[i-1]['Avg_Amount'], color='blue')
        plt.title("Distribution of Average Transaction Amount per Currency")
        plt.xlabel("Average Transaction Amount")
        plt.ylabel("Density")
        image_path = os.path.join(output_folder, "output.png")
    else:
        if len(results[i-1].columns) == 1:  # If there's only one column
            sns.barplot(data=results[i-1], x=results[i-1].columns[0], y=results[i-1].index, palette="Blues")
            plt.xlabel(results[i-1].columns[0])
        else:
            sns.barplot(data=results[i-1], x=results[i-1].columns[0], y=results[i-1].columns[1], palette="Blues")
            plt.xlabel(results[i-1].columns[0])
            plt.ylabel(results[i-1].columns[1])
        plt.title(question)
        image_path = os.path.join(output_folder, f"plot_{i}.jpg")
        if i == 5:  # Rotate x-axis labels for query 5 plot
            plt.xticks(rotation=90)
    plt.savefig(image_path)
    plt.close()

# Generate HTML content
html_content = """
<!DOCTYPE html>
<html>
<head>
    <title>Banking Data Analysis</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            padding: 20px;
        }
        a {
            color: grey;
            text-decoration: none;
        }

        a:hover {
            color: darkgrey;
        }

        a:visited {
            color: grey;
        }

        a:active {
            color: grey;
        }
        h1 {
            color: #333;
        }
        h2, h3, h4 {
            color: #444;
        }
        h5{
            font-size: 24px;
        }
        pre {
            background-color: #f4f4f4;
            padding: 10px;
            border: 1px solid #ddd;
        }
        .container {
            display: flex;
            flex-wrap: wrap;
            justify-content: space-between;
        }
        .column {
            width: 48%;
            margin-bottom: 40px;
        }
        .query {
            margin-bottom: 20px;
        }
        .query-title {
            font-size: 18px;
            font-weight: bold;
            color: #333;
            margin-bottom: 10px;
        }
        .query-result {
            border-collapse: collapse;
            width: 100%;
            font-size: 14px;
        }
        .query-result th, .query-result td {
            border: 1px solid #ddd;
            padding: 8px;
            text-align: left;
        }
        .query-result th {
            background-color: #f2f2f2;
        }
        .query-image {
            margin-top: 20px;
        }
        .query-image img {
            max-width: 100%;
        }
    </style>
</head>
<body>
<h1>Banking Data Analysis</h1>

<h2>Banking and Money Movement Data Project</h2>
<p>This project demonstrates the process of transforming a CSV file into a SQL database using Python and SQLite, and running SQL queries on the database. Additionally, it showcases how to create visualizations with Looker Dashboard and generate reports using SSIS and SSIR.</p>

<h3>Table of Contents</h3>
<ul>
    <li><a href="#project-overview">Project Overview</a></li>
    <li><a href="#features">Features</a></li>
    <li><a href="#setup">Setup</a></li>
    <li><a href="#usage">Usage</a></li>
    <li><a href="#spreadsheet-columns-reference">Spreadsheet Columns Reference</a></li>
    <li><a href="#contributing">Contributing</a></li>
    <li><a href="#license">License</a></li>
</ul>

<h3 id="project-overview">Project Overview</h3>
<p>This project takes a CSV file containing banking and money movement data and converts it into a SQL database using Python and SQLite. SQL queries are then run on the database to extract meaningful insights. Visualizations are created using Looker Dashboard, and reports are generated using SQL Server Integration Services (SSIS) and SQL Server Reporting Services (SSIR).</p>

<h3 id="features">Features</h3>
<ul>
    <li>Convert CSV file to SQL database</li>
    <li>Run SQL queries on the database</li>
    <li>Visualize data with Looker Dashboard</li>
    <li>Generate reports with SSIS and SSIR</li>
</ul>

<h3 id="setup">Setup</h3>

<h4>Prerequisites</h4>
<ul>
    <li>Python 3.x</li>
    <li>SQLite</li>
    <li>Looker</li>
    <li>SSIS</li>
    <li>SSIR</li>
</ul>

<h4>Installation</h4>
<pre><code>
1. Clone the repository:
    git clone https://github.com/yourusername/banking-data-project.git
    cd banking-data-project

2. Create a virtual environment and activate it:
    python -m venv venv
    source venv/bin/activate  # On Windows use `venv\Scripts\activate`

3. Install the required Python packages:
    pip install -r requirements.txt
</code></pre>

<h3 id="usage">Usage</h3>

<h4 id="1-csv-to-sql-database">1. CSV to SQL Database</h4>
<p>Run the following script to convert the CSV file into a SQLite database:</p>
<pre><code>
import pandas as pd
import sqlite3

# Load CSV into DataFrame
df = pd.read_csv('data/banking_data.csv')

# Connect to SQLite database (or create it)
conn = sqlite3.connect('data/banking_data.db')

# Convert DataFrame to SQL
df.to_sql('banking_data', conn, if_exists='replace', index=False)

# Close the connection
conn.close()
</code></pre>

<h4 id="2-running-sql-queries">2. Running SQL Queries</h4>
<p>Run SQL queries on the SQLite database using the following script:</p>
<pre><code>
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('data/banking_data.db')
cursor = conn.cursor()

# Sample query
query = "SELECT * FROM banking_data WHERE amount > 1000"
result = cursor.execute(query).fetchall()


# Print the result
for row in result:
    print(row)

# Close the connection
conn.close()
</code></pre>

<h4 id="3-looker-dashboard">3. Looker Dashboard</h4>
<p>To visualize the data, I used Looker. The Looker Dashboard can be found here (link#).</p>

<h4 id="4-ssis-and-ssir-reports">4. SSIS and SSIR Reports</h4>
<p>Generated reports using SSIS and SSIR. You can view SSIS and SSIR here (link#).</p>
<p>For detailed instructions on creating and running reports, refer to the <a href="https://docs.microsoft.com/en-us/sql/integration-services/sql-server-integration-services">SSIS Documentation</a> and <a href="https://docs.microsoft.com/en-us/sql/reporting-services/create-deploy-and-manage-mobile-and-paginated-reports">SSIR Documentation</a>.</p>

<h3 id="spreadsheet-columns-reference">Spreadsheet Columns Reference</h3>
<p>The CSV file contains the following columns related to banking and money movement:</p>
<ul>
    <li><code>TransactionID</code>: Unique identifier for each transaction</li>
    <li><code>AccountID</code>: Unique identifier for each account</li>
    <li><code>TransactionDate</code>: Date of the transaction</li>
    <li><code>Amount</code>: Amount of money moved in the transaction</li>
    <li><code>TransactionType</code>: Type of transaction (e.g., deposit, withdrawal)</li>
    <li><code>Description</code>: Description of the transaction</li>
    <li><code>First Name</code>: First name of the account holder</li>
    <li><code>Last Name</code>: Last name of the account holder</li>
    <li><code>VendorID</code>: Unique identifier for each vendor</li>
    <li><code>FeeID</code>: Unique identifier for each fee</li>
    <li><code>FeePayable</code>: Amount of fee payable</li>
    <li><code>Card</code>: Type of card used (e.g., Virtual, Physical)</li>
    <li><code>MCC GroupName</code>: Merchant Category Code group name</li>
    <li><code>Channel</code>: Channel through which the transaction was made</li>
    <li><code>CardState</code>: State of the card (e.g., active, inactive)</li>
    <li><code>CardToken</code>: Tokenized representation of the card</li>
</ul>

<h3 id="contributing">Contributing</h3>
<p>Contributions are welcome! Please read the <a href="CONTRIBUTING.md">contributing guidelines</a> before you start.</p>

<h3 id="license">License</h3>
<p>This project is licensed under the MIT License. See the <a href="LICENSE">LICENSE</a> file for details.</p>

<h5 id="Queries"> Queries </h5>

<div class="container">
"""

# Append each query and its result to the HTML content
for i, (query, question) in enumerate(queries, 1):
    if i not in [2, 4]:
        html_content += f"""
        <div class="column">
            <div class="query">
                <div class="query-title">Query {i}: {question}</div>
                <table class="query-result">
                    <tr>
                        <th>{results[i-1].columns[0]}</th>
                        {'<th>' + results[i-1].columns[1] + '</th>' if len(results[i-1].columns) > 1 else ''}
                    </tr>
        """
        for _, row in results[i-1].iterrows():
            html_content += f"""
                    <tr>
                        <td>{row[0]}</td>
                        {'<td>' + str(row[1]) + '</td>' if len(results[i-1].columns) > 1 else ''}
                    </tr>
            """
        html_content += """
                </table>
            </div>
        """
        # Generate the image path for the current query
        if i == 6:
            image_path = os.path.join(output_folder, "output.png")
        else:
            image_path = os.path.join(output_folder, f"plot_{i}.jpg")
        
        # Embed Seaborn images into HTML
        html_content += f"""
            <div class="query-image">
                <img src="{image_path}" alt="Plot {i}">
            </div>
        </div>
        """

# Close the HTML content
html_content += """
</div>
</body>
</html>
"""

# Save HTML content to a file
html_file_path = "banking_data_analysis.html"
with open(html_file_path, "w") as html_file:
    html_file.write(html_content)

print(f"HTML file saved to: {os.path.abspath(html_file_path)}")
