In [None]:

# Here's a summary of what Kafka does:

# 1. Publish-Subscribe Messaging System: Kafka operates as a distributed messaging system where producers publish 
# records (messages) to topics, and consumers subscribe to topics to receive and process these records.

# 2. Scalability and Fault-Tolerance: Kafka is designed to be highly scalable and fault-tolerant. It can handle large 
# volumes of data and distribute it across multiple nodes (brokers) in a cluster to ensure high availability and reliability.

# 3. Durable and Persistent: Kafka maintains a durable log of records, allowing data to be stored for a configurable 
# retention period. This enables consumers to replay messages from any point in time and ensures that data is not lost even 
# in the event of failures.

# 4. Real-time Stream Processing: Kafka supports real-time stream processing by providing APIs and libraries for building 
# stream processing applications. These applications can process data as it arrives, enabling real-time analytics, monitoring, 
# and alerting.

# 5. Integration with Ecosystem: Kafka integrates seamlessly with various data systems and frameworks, including Hadoop, 
# Spark, Flink, and others. It serves as a central data hub for ingesting, storing, and processing data from diverse sources.

# Overall, Kafka is widely used in modern data architectures for building scalable, reliable, and real-time data pipelines,
# enabling organizations to efficiently manage and process large streams of data.


In [None]:

# Try this: 
# In Windows C-prompt...
# First start zookeeper using the command given below:
# C:\kafka_2.12-3.7.0\bin\windows\zookeeper-server-start.bat C:\kafka_2.12-3.7.0\config\zookeeper.properties


# Now open another command prompt and change the directory to the kafka folder. Run kafka server using the command:
# C:\kafka_2.12-3.7.0\bin\windows\kafka-server-start.bat C:\kafka_2.12-3.7.0\config\server.properties


In [2]:

# Anaconda Prompt:
# pip show kafka
# Name: kafka
# Version: 1.3.5
# Summary: Pure Python client for Apache Kafka
# Home-page: https://github.com/dpkp/kafka-python
# Author: Dana Powers
# Author-email: dana.powers@gmail.com
# License: Apache License 2.0
# Location: C:\Users\ryan_\anaconda3\Lib\site-packages
# Requires:
# Required-by:


# Anaconda Prompt: 
# pip install --upgrade kafka-python
# ...then restart Spyder IDE...

    
# C-prompt:
# ping localhost

# Pinging MyDevice [::1] with 32 bytes of data:
# Reply from ::1: time<1ms
# Reply from ::1: time<1ms
# Reply from ::1: time<1ms
# Reply from ::1: time<1ms


In [1]:

# Ok, ok, ok, enough setup steps.
# now, simply run the script below...

from kafka import KafkaProducer
from yahoo_fin import stock_info
import time

# Initialize Kafka producer
producer = KafkaProducer(bootstrap_servers='localhost:9092')

# Define Kafka topic
topic = 'stock_prices'

# Fetch live stock price updates and produce them to Kafka topic
while True:
    try:
        # Fetch stock price data from Yahoo Finance
        stock_data = stock_info.get_quote_table('AAPL')  # Example: Apple Inc. (AAPL)
        
        # Produce stock price data to Kafka topic
        producer.send(topic, str(stock_data).encode('utf-8'))
        print("Stock price data published to Kafka topic:", stock_data)
        
        # Wait for some time before fetching the next update
        time.sleep(60)  # Fetch updates every 60 seconds
    except Exception as e:
        print("Error:", e)
        

In [4]:

# RESULT...
# data = data.append(quote_price)
# Stock price data published to Kafka topic: {'1y Target Est': 201.28, '52 Week Range': '155.98 - 199.62', 'Ask': 
# '172.45 x 100', 'Avg. Volume': 60617903.0, 'Beta (5Y Monthly)': 1.29, 'Bid': '172.28 x 100', "Day's Range": 
# '170.06 - 173.05', 'EPS (TTM)': 6.42, 'Earnings Date': 'May 02, 2024 - May 06, 2024', 'Ex-Dividend Date': 
# 'Feb 09, 2024', 'Forward Dividend & Yield': '0.96 (0.56%)', 'Market Cap': '2.66T', 'Open': 171.86, 'PE Ratio (TTM)': 
# 26.83, 'Previous Close': 171.37, 'Quote Price': 172.27999877929688, 'Volume': 70618124.0}
    

In [None]:

# Live Price:
print(stock_info.get_live_price('AAPL'))



# Historical Price Data:
# current date minus a month
from datetime import datetime
from dateutil.relativedelta import relativedelta
from datetime import datetime, timedelta

currentTimeDate = datetime.now() - relativedelta(months=12)
start = currentTimeDate.strftime('%Y-%m-%d')
end = datetime.now().date() + timedelta(days=1)
print(start)
print(end)

print(stock_info.get_data('AAPL'))
print(stock_info.get_data('AAPL', start_date=start, end_date=end))


In [None]:

# RESULT...
# 172.27999877929688
# 2023-03-23
# 2024-03-24
#                   open        high         low  ...    adjclose     volume  ticker
# 1980-12-12    0.128348    0.128906    0.128348  ...    0.099192  469033600    AAPL
# 1980-12-15    0.122210    0.122210    0.121652  ...    0.094017  175884800    AAPL
# 1980-12-16    0.113281    0.113281    0.112723  ...    0.087117  105728000    AAPL
# 1980-12-17    0.115513    0.116071    0.115513  ...    0.089273   86441600    AAPL
# 1980-12-18    0.118862    0.119420    0.118862  ...    0.091861   73449600    AAPL
#                ...         ...         ...  ...         ...        ...     ...
# 2024-03-18  175.570007  177.710007  173.520004  ...  173.720001   75604200    AAPL
# 2024-03-19  174.339996  176.610001  173.029999  ...  176.080002   55215200    AAPL
# 2024-03-20  175.720001  178.669998  175.089996  ...  178.669998   53423100    AAPL
# 2024-03-21  177.050003  177.490005  170.839996  ...  171.369995  106181300    AAPL
# 2024-03-22  171.759995  173.050003  170.059998  ...  172.279999   71106600    AAPL

# [10910 rows x 7 columns]
#                   open        high         low  ...    adjclose     volume  ticker
# 2023-03-23  158.830002  161.550003  157.679993  ...  158.086166   67622100    AAPL
# 2023-03-24  158.860001  160.339996  157.850006  ...  159.399170   59196500    AAPL
# 2023-03-27  159.940002  160.770004  157.869995  ...  157.439636   52390300    AAPL
# 2023-03-28  157.970001  158.490005  155.979996  ...  156.812958   45992200    AAPL
# 2023-03-29  159.369995  161.050003  159.350006  ...  159.916412   51305700    AAPL
#                ...         ...         ...  ...         ...        ...     ...
# 2024-03-18  175.570007  177.710007  173.520004  ...  173.720001   75604200    AAPL
# 2024-03-19  174.339996  176.610001  173.029999  ...  176.080002   55215200    AAPL
# 2024-03-20  175.720001  178.669998  175.089996  ...  178.669998   53423100    AAPL
# 2024-03-21  177.050003  177.490005  170.839996  ...  171.369995  106181300    AAPL
# 2024-03-22  171.759995  173.050003  170.059998  ...  172.279999   71106600    AAPL

# [252 rows x 7 columns]
