<a href="https://colab.research.google.com/github/dsfdev2023/studentEvaluatiuon/blob/main/Bank_Streamlit_Demo_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Install necessary packages:**



In [25]:
!pip install PyPDF2 tabulate streamlit



In [26]:
!pip install pyngrok



In [27]:
!pip install pandas openpyxl



In [28]:
!pip install altair



In [29]:
!pip install Whoosh



# **CODE FOR STREAMLIT**

In [46]:
%%writefile streamlit_app.py

import streamlit as st
import pandas as pd
import PyPDF2
import re
import os
import altair as alt
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.writing import BufferedWriter
from whoosh.qparser import QueryParser
from io import BytesIO
import base64

def extract_text_from_pdf(pdf_content):
    reader = PyPDF2.PdfReader(pdf_content)
    text = ''
    for page in reader.pages:
        text += page.extract_text()
    return text

def format_date(date_str):
    return f"{date_str[:2]}-{date_str[2:4]}-{date_str[4:]}"

def extract_info(documentText):
    name_match = re.search(r'M\s+([A-Z\s]+)', documentText)
    extractedName = name_match.group(1).strip() if name_match else None

    date_match = re.search(r'\b(\d{7,8})\b', documentText)
    extractedDate = format_date(date_match.group(1)) if date_match else None

    all_amount_matches = re.findall(r'(\d{1,3}(?:\s\d{3})*,\d{2})', documentText)
    extractedAmount = all_amount_matches[-1].replace(' ', '').replace(',', '.') if all_amount_matches else None

    return extractedName, extractedDate, extractedAmount

button_css = """
<style>
    .pdf-btn, .excel-btn {
        color: white !important;
    }

    .pdf-btn {
        background-color: #008CBA;
        padding: 14px 28px;
        font-size: 16px;
        cursor: pointer;
        border: none;
        border-radius: 8px;
        text-decoration: none;
        font-weight: bold;
        display: inline-block;
    }
    .pdf-btn:hover {
        background-color: #007B9A;
    }

    .excel-btn {
        background-color: #4CAF50;
        padding: 14px 28px;
        font-size: 16px;
        cursor: pointer;
        border: none;
        border-radius: 8px;
        text-decoration: none;
        font-weight: bold;
        display: inline-block;
    }
    .excel-btn:hover {
        background-color: #45a049;
    }
</style>
"""

def get_pdf_download_link(pdf_content, filename='document.pdf', text='Download PDF file'):
    b64 = base64.b64encode(pdf_content)
    return button_css + f'<a class="pdf-btn" href="data:application/pdf;base64,{b64.decode()}" download="{filename}">{text}</a>'

def get_table_download_link(df, filename="data.xlsx", text="Download Excel"):
    output = BytesIO()
    writer = pd.ExcelWriter(output, engine='openpyxl')
    df.to_excel(writer, index=False)
    writer.save()
    processed_data = output.getvalue()
    b64 = base64.b64encode(processed_data).decode()
    href = button_css + f'<a class="excel-btn" href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}">{text}</a>'
    return href

st.title("DSF Smart Doc: BMCE Case")

schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))
if not os.path.exists("indexdir"):
    os.mkdir("indexdir")
ix = create_in("indexdir", schema)

uploaded_files = st.file_uploader("Please Upload your PDF files", type=["pdf"], accept_multiple_files=True)

if uploaded_files:
    num_files = len(uploaded_files)
    st.success(f"{num_files} file{'s' if num_files > 1 else ''} successfully uploaded!")
    results = []

    writer = BufferedWriter(ix)
    for uploaded_file in uploaded_files:
        documentText = extract_text_from_pdf(uploaded_file)
        writer.add_document(title=uploaded_file.name, content=documentText)

        name, date, amount = extract_info(documentText)
        results.append([uploaded_file.name, name, date, amount])

    writer.commit()
    writer.close()

    headers = ['File Name', 'Name', 'Date', 'Amount']
    df = pd.DataFrame(results, columns=headers)
    st.write(df)
    st.markdown(get_table_download_link(df), unsafe_allow_html=True)

    df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y', errors='coerce')
    df['Amount'] = df['Amount'].str.replace(',', '.').astype(float)

    st.subheader("Amounts over Time by Client")
    for client in df['Name'].unique():
        client_data = df[df['Name'] == client]
        if len(client_data) > 1:
            line_chart = alt.Chart(client_data).mark_line(point=True, color='blue').encode(
                x=alt.X('Date:T', title='Date'),
                y=alt.Y('Amount:Q', title='Amount in Dirhams'),
                tooltip=[alt.Tooltip('Date:T', title='Date'), alt.Tooltip('Amount:Q', title='Amount (Dirhams)', format='.2f')]
            ).properties(
                title=f"Amounts over Time for {client}",
                width=800,
                height=600
            ).interactive()
            st.altair_chart(line_chart, use_container_width=True)
        else:
            st.write(f"{client} has only one transaction on {client_data['Date'].iloc[0].strftime('%Y-%m-%d')} with an amount of {client_data['Amount'].iloc[0]} Dirhams")

    st.subheader("Summary: Total Amounts by Client in Dirhams")
    grouped_data = df.groupby('Name').agg({'Amount': 'sum'}).reset_index()
    grouped_data['Amount'] = grouped_data['Amount'].apply(lambda x: f"{x:,.2f}")  # Format with 2 decimal places
    st.table(grouped_data)




st.subheader("Digital Archive")

# Input for searching keywords
search_term = st.text_input("Search keywords to find the corresponding PDFs")

if search_term:
    # Create a container for the search results
    results_container = st.container()

    with ix.searcher() as searcher:
        query = QueryParser("content", ix.schema).parse(search_term)
        results = searcher.search(query, limit=None)

        # Check if there are any results
        if len(results) == 0:
            results_container.error("No results found.")
        else:
            results_container.success(f"{len(results)} results found:")

            # Create a container for displaying the search results
            results_list = results_container.container()

            # Iterate through the search results and display them
            for hit in results:
                pdf_name = hit['title']
                uploaded_file = next(upload for upload in uploaded_files if upload.name == pdf_name)
                uploaded_file.seek(0)  # Reset the file pointer to the beginning
                pdf_content = uploaded_file.read()

                # Create a container for each search result
                result_item = results_list.container()

                # Display PDF Name and a download button for each result
                result_item.write(f"PDF Name: {pdf_name}")
                result_item.markdown(get_pdf_download_link(pdf_content, filename=pdf_name), unsafe_allow_html=True)


Overwriting streamlit_app.py


# **RUN STREAMLIT**

In [31]:
import os

# Download and unzip ngrok only if it doesn't exist yet
if not os.path.exists("ngrok-stable-linux-amd64.zip"):
    !wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
    !unzip -o ngrok-stable-linux-amd64.zip

# Authenticate with ngrok using your auth token
!./ngrok authtoken 2Wtkkzkovj51i2fQ8MUCQrf7DyT_7cft4ztcihL3M9MvpsKdC


Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [47]:
# Start ngrok for the Streamlit port 8501
get_ipython().system_raw('./ngrok http 8501 &')
!sleep 2  # Wait a couple of seconds for the tunnel to be established

# Fetch and print the public URL
public_url = !curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; tunnels = json.load(sys.stdin)['tunnels']; print(tunnels[0]['public_url'] if tunnels else '')"
print(public_url[0])

https://2891-34-170-223-197.ngrok-free.app


In [None]:
!streamlit run streamlit_app.py



Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.170.223.197:8501[0m
[0m
  writer.save()
  writer.save()
  writer.save()
  writer.save()
  writer.save()
  writer.save()
  writer.save()
