In [3]:
import requests
import pandas as pd
from datetime import datetime
from io import BytesIO
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas
from reportlab.lib.utils import ImageReader
import numpy as np
import os
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from PIL import Image
from matplotlib.backends.backend_pdf import PdfPages
import openai

# FRED API key
api_key = "acd3990b4d05ee7c381050179c32f840"

# Define the FRED series IDs for the United States economic indicators we will need
indicators = {
    "GDP": "GDP",
    "GDP Growth Rate": "A191RL1Q225SBEA",
    "Unemployment Rate": "UNRATE",
    "Inflation Rate": "CPIAUCSL",
    "Consumer Confidence Index": "UMCSENT",
    "Consumer Price Index": "CPIAUCSL",
    "Trade Balance": "NETEXP",
    "Budget Deficit/Surplus": "FYFSGDA188S",
    "Debt to GDP Ratio": "GFDEGDQ188S",
    "Labor Force Participation Rate": "CIVPART",
    "Federal Funds Rate": "FEDFUNDS",
    "10-Year Treasury Rate": "GS10",
    "Personal Consumption Expenditures": "PCE",
    "Retail Sales": "RSAFS",
    "Industrial Production Index": "INDPRO",
    "Housing Starts": "HOUST",
    "New Home Sales": "HSN1F",
    "Exports of Goods and Services": "EXPGS",
    "Imports of Goods and Services": "IMPGS",
    "Nonfarm Payrolls": "PAYEMS",
    "Average Hourly Earnings": "AHETPI",
    "Consumer Sentiment Index": "UMCSENT",
}

# The URL for FRED API
base_url = "https://api.stlouisfed.org/fred/series/observations"

# Dictionary to store indicator data in DataFrames
indicator_data_frames = {}

# Function to fetch data from FRED API with specified date range
def fetch_data(series_id):
    params = {
        "series_id": series_id,
        "api_key": api_key,
        "file_type": "json",
        "realtime_start": "2023-09-18",  # Start date (current date)
        "realtime_end": "2023-09-18",    # End date (current date)
    }
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Fetch and store historical data for each indicator in different DataFrames
for indicator_name, series_id in indicators.items():
    data = fetch_data(series_id)
    if data and "observations" in data:
        dates = []
        values = []
        for observation in data["observations"]:
            values.append(observation["value"])
            dates.append(observation["date"])
        indicator_df = pd.DataFrame({"Date": dates, indicator_name: values})
        indicator_df["Date"] = pd.to_datetime(indicator_df["Date"])
        indicator_df.set_index("Date", inplace=True)
        indicator_data_frames[indicator_name] = indicator_df
    else:
        print(f"Failed to fetch data for {indicator_name}")
NewHomeSales = indicator_data_frames['New Home Sales']
GDP = indicator_data_frames['GDP']
GDPGrowth = indicator_data_frames['GDP Growth Rate']
UnEmploymentRate = indicator_data_frames['Unemployment Rate']
InflationRate = indicator_data_frames['Inflation Rate']
CCI = indicator_data_frames['Consumer Confidence Index']
CPI = indicator_data_frames['Consumer Price Index']
CSI = indicator_data_frames['Consumer Sentiment Index']
TradeBalance  = indicator_data_frames['Trade Balance']
BudgetDeficit = indicator_data_frames['Budget Deficit/Surplus']
DebtToGDP = indicator_data_frames['Debt to GDP Ratio']
LaborForceParticipation = indicator_data_frames['Labor Force Participation Rate']
FederalFundsRate = indicator_data_frames['Federal Funds Rate']
TenYearTreasuryRate = indicator_data_frames['10-Year Treasury Rate']
PersonalConsumptionExpenditures = indicator_data_frames['Personal Consumption Expenditures']
RetailSales = indicator_data_frames['Retail Sales']
IndustrialProductionIndex = indicator_data_frames['Industrial Production Index']
HousingStarts = indicator_data_frames['Housing Starts']
NewHomeSales = indicator_data_frames['New Home Sales']
ExportOfGoodsAndServices =  indicator_data_frames['Exports of Goods and Services']
ImportOfGoodsAndServices = indicator_data_frames['Imports of Goods and Services']
NonFarmPayroll =  indicator_data_frames['Nonfarm Payrolls']
AverageHourlyEarnings = indicator_data_frames['Average Hourly Earnings']

# Define a color palette for the plots
color_palette = px.colors.qualitative.Plotly

# line graphs with qualitative visibility
dataframes_to_plot = [
    NewHomeSales, GDP, GDPGrowth, UnEmploymentRate, InflationRate, CCI, CPI,
    CSI, TradeBalance, BudgetDeficit, DebtToGDP, LaborForceParticipation,
    FederalFundsRate, TenYearTreasuryRate, PersonalConsumptionExpenditures,
    RetailSales, IndustrialProductionIndex, HousingStarts, ExportOfGoodsAndServices,
    ImportOfGoodsAndServices, NonFarmPayroll, AverageHourlyEarnings
]

# Defining a function to convert Plotly figures to PNG images
def plotly_to_image(fig):
    img_data = fig.to_image(format="png")
    return ImageReader(BytesIO(img_data))

# Create a PDF document and set up the canvas
pdf_buffer = BytesIO()
c = canvas.Canvas(pdf_buffer, pagesize=letter)

toc_text = (
    "United States Economic Data Report Table of Contents\n\n"
    "Page 2: Table Of Contents\n"
    "Page 3: New Home Sales\n"
    "Page 4: GDP\n"
    "Page 5: GDP Growth Rate\n"
    "Page 6: Unemployment Rate\n"
    "Page 7: Inflation Rate\n"
    "Page 8: Consumer Confidence Index\n"
    "Page 9: Consumer Price Index\n"
    "Page 10: Consumer Sentiment Index\n"
    "Page 11: Trade Balance\n"
    "Page 12: Budget Deficit\n"
    "Page 13: Debt To GDP\n"
    "Page 14: Labor Force Participation\n"
    "Page 15: Federal Funds Rate\n"
    "Page 16: 10-Year Treasury Rate\n"
    "Page 17: Personal Consumption Expenditures\n"
    "Page 18: Retail Sales\n"
    "Page 19: Industrial Production Index\n"
    "Page 20: Housing Starts\n"
    "Page 21: Exports Of Goods And Services\n"
    "Page 22: Imports Of Goods And Services\n"
    "Page 23: Non-Farm Payrolls\n"
    "Page 24: Average Hourly Earnings\n"
)

# Add cover page with logos
logo_path1 = r"C:\Users\daddy\Downloads\NiSource Supply Chain Market Insights.png.png"
if os.path.exists(logo_path1):
    logo_width = 612
    logo_height = 792
    c.drawImage(logo_path1, 0, 0, width=logo_width, height=logo_height)

# Add page break after cover page
c.showPage()

# Add the table of contents on the second page
c.setFont("Helvetica-Bold", 14)
c.drawString(50, 750, "Table of Contents")
c.setFont("Helvetica", 12)

# Split the TOC text into lines and draw them
toc_lines = toc_text.split("\n")
y_pos = 730
for line in toc_lines:
    c.drawString(70, y_pos, line)
    y_pos -= 15

# Save the table of contents page
c.showPage()

openai.api_key = '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'

# Loop through the dataframes and generate the Plotly figures and PDF pages
for idx, df in enumerate(dataframes_to_plot):
    color = color_palette[idx % len(color_palette)]
    
    # Keep only the last 100 data points
    df = df.iloc[-10:]
    
    # Convert the data to numeric
    df[df.columns[0]] = pd.to_numeric(df[df.columns[0]], errors='coerce')

    scale_info = ""
    if df.columns[0] in ["GDP", "Trade Balance", "Budget Deficit/Surplus", "Personal Consumption Expenditures"]:
        scale_info = " (in billions)"
    elif df.columns[0] in ["Retail Sales"]:
        scale_info = " (in millions)"
    elif df.columns[0] in ["New Home Sales", "Housing Starts", "Exports of Goods and Services", "Imports of Goods and Services"]:
        scale_info = " (in thousands)"

    fig = px.line(
        df,
        x=df.index,
        y=df.columns[0],
        title=f"{df.columns[0]} Over Time{scale_info}",
        line_shape='linear',
        markers=True,
        template='plotly_dark',
        labels={'Date': 'Date', df.columns[0]: 'Value'},
        line_dash_sequence=['solid'],
    )
    
    # Generate a text prompt for mean and standard deviation
    mean_value = df[df.columns[0]].mean()
    std_value = df[df.columns[0]].std()
    prompt = f"Analyzing the {df.columns[0]} statistics: The mean value is {mean_value:.2f} and the standard deviation is {std_value:.2f}. Analyze how the last 48 data points compare to the 48 data points before it, highlighting any significant changes or trends. Also, discuss the potential economic effects of {df.columns[0]} on the economy and provide sentiment analysis, Also tell me if the data is normalized or not."
    # Generate analysis for each indicator
    response = openai.Completion.create(
        engine="text-davinci-002",
        prompt=prompt,
        max_tokens=500,
    )

    analysis = response.choices[0].text.strip()

    # Remove redundant and insignificant statements from analysis
    unique_lines = []
    prev_line = None
    for line in analysis.split('\n'):
        if line != prev_line and line.strip().endswith(".") and len(line.split()) > 5:
            unique_lines.append(line)
            prev_line = line

    # Draw the analysis text on the PDF page
    c.drawString(55, 50, "Intelligent Analysis:")
    y_text = 231

    for line in unique_lines:
        # Check if text exceeds the page width
        if c.stringWidth(line) <= 500:
            c.drawString(50, y_text, line)
            y_text -= 12
        else:
            # Split the line into multiple lines if it's too wide
            words = line.split()
            temp_line = ""
            for word in words:
                if c.stringWidth(temp_line + " " + word) <= 500:
                    temp_line += " " + word
                else:
                    c.drawString(50, y_text, temp_line)
                    y_text -= 12
                    temp_line = word
            if temp_line:
                c.drawString(50, y_text, temp_line)
                y_text -= 12

    fig.add_trace(go.Scatter(x=df.index, y=[mean_value] * len(df.index),
                             mode='lines', line=dict(color='yellow'), name='Mean'))
    fig.add_trace(go.Scatter(x=df.index, y=[mean_value + 2 * std_value] * len(df.index),
                             mode='lines', line=dict(color='green'), name='+2 Std Dev'))
    fig.add_trace(go.Scatter(x=df.index, y=[mean_value + 3 * std_value] * len(df.index),
                             mode='lines', line=dict(color='green', dash='dash'), name='+3 Std Dev'))
    fig.add_trace(go.Scatter(x=df.index, y=[mean_value - 2 * std_value] * len(df.index),
                             mode='lines', line=dict(color='red'), name='-2 Std Dev'))
    fig.add_trace(go.Scatter(x=df.index, y=[mean_value - 3 * std_value] * len(df.index),
                             mode='lines', line=dict(color='red', dash='dash'), name='-3 Std Dev'))
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False)
    fig.update_traces(line=dict(color=color, width=1), marker=dict(size=10))

    # Save the figure to a temporary image file
    temp_image_path = f"temp_image_{idx}.png"
    fig.write_image(temp_image_path)

    # Embed the image in the PDF and add a new page
    c.drawImage(temp_image_path, 50, 300, width=500, height=300)
    if idx < len(dataframes_to_plot) - 1:
        c.showPage()

    # Clean up the temporary image file
    os.remove(temp_image_path)

# Save and close the PDF
c.save()
pdf_buffer.seek(0)

# Save PDF Report
with open("economic_indicators.pdf", "wb") as f:
    f.write(pdf_buffer.read())



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



A value is