In [2]:
# Function to download the model from the Hugging Face model hub
from huggingface_hub import hf_hub_download

# Importing the Llama class from the llama_cpp module
from llama_cpp import Llama

# Importing the library for data manipulation
import pandas as pd

from tqdm import tqdm # For progress bar related functionalities
tqdm.pandas()

In [3]:
stock_news = pd.read_csv("stock_news.csv")

In [4]:
data = stock_news.copy()

In [5]:
model_name_or_path = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
model_basename = "mistral-7b-instruct-v0.2.Q6_K.gguf"


model_path = hf_hub_download(
    repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", # Complete the code to mention the repo_id
    filename="mistral-7b-instruct-v0.2.Q6_K.gguf" # Complete the code to mention the filename
)

mistral-7b-instruct-v0.2.Q6_K.gguf:   0%|          | 0.00/5.94G [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [6]:
#uncomment the below snippet of code if the runtime is connected to GPU.
llm = Llama(
    model_path=model_path, # Path to the model
    n_gpu_layers=100, #Number of layers transferred to GPU
    n_ctx=4500, #Context window
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 0 | VSX = 0 | 


In [7]:
data["Date"] = pd.to_datetime(data['Date'])  # Convert the 'Date' column to datetime format.

In [8]:
# Group the data by week using the 'Date' column.
weekly_grouped = data.groupby(pd.Grouper(key='Date', freq='W'))

In [9]:
# Aggregate the grouped data on a weekly basis:
# concatenate 'content' values into a single string separated by ' || '.
weekly_grouped = weekly_grouped.agg(
    {
        'News': lambda x: ' || '.join(x)  # Join the news values with ' || ' separator.
    }
).reset_index()

print(weekly_grouped.shape)

(18, 2)


In [10]:
weekly_grouped

Unnamed: 0,Date,News
0,2019-01-06,The tech sector experienced a significant dec...
1,2019-01-13,Sprint and Samsung plan to release 5G smartph...
2,2019-01-20,The U.S. stock market declined on Monday as c...
3,2019-01-27,"The Swiss National Bank (SNB) governor, Andre..."
4,2019-02-03,Caterpillar Inc reported lower-than-expected ...
5,2019-02-10,"The Dow Jones Industrial Average, S&P 500, an..."
6,2019-02-17,"This week, the European Union's second highes..."
7,2019-02-24,This news article discusses progress towards ...
8,2019-03-03,The Dow Jones Industrial Average and other ma...
9,2019-03-10,"Spotify, the world's largest paid music strea..."


In [11]:
# creating a copy of the data
data_1 = weekly_grouped.copy()

In [12]:
# defining a function to parse the JSON output from the model
def extract_json_data(json_str):
    import json
    try:
        # Find the indices of the opening and closing curly braces
        json_start = json_str.find('{')
        json_end = json_str.rfind('}')

        if json_start != -1 and json_end != -1:
            extracted_category = json_str[json_start:json_end + 1]  # Extract the JSON object
            data_dict = json.loads(extracted_category)
            return data_dict
        else:
            print(f"Warning: JSON object not found in response: {json_str}")
            return {}
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return {}

In [13]:
#Defining the response function
def response_mistral_1(prompt, news):
    model_output = llm(
      f"""
      [INST]
      {prompt}
      News Articles: {news}
      [/INST]
      """,
      max_tokens=512, #Complete the code to set the maximum number of tokens the model should generate for this task.
      temperature=0.7, #Complete the code to set the value for temperature.
      top_p=0.9, #Complete the code to set the value for top_p
      top_k=50, #Complete the code to set the value for top_k
      echo=False,
    )

    final_output = model_output["choices"][0]["text"]

    return final_output

In [14]:
news = data_1.loc[0, 'News']

In [15]:
print(len(news.split(' ')))
news

2611




In [18]:
#Complete the code to define the prompt for this task
prompt = """
You are an expert data analyst specializing in news content analysis.

Task: Summarize the provided news content by identifying the top three positive and negative events that are most likely to impact the price of the stock.

Instructions:
1. Read the news content carefully.
2. Identify the key positive and negative events in the content.
3. Determine how these events could impact stock prices.
4. Summarize the key positive and negative events, ensuring clarity and relevance.
5. Ensure the events are ordered by their potential impact on the stock.

Output Format: Return the output in JSON format with two keys:
- "Positive Events": A list of the top three positive events.
- "Negative Events": A list of the top three negative events.

Example Output:
{
    "Positive Events": ["Positive event 1", "Positive event 2", "Positive event 3"],
    "Negative Events": ["Negative event 1", "Negative event 2", "Negative event 3"]
}

"""

In [19]:
%%time
summary = response_mistral_1(prompt, news)
print(summary)

Llama.generate: prefix-match hit


 {
          "Positive Events": [
            "Roku Inc announced plans to offer premium video channels on a subscription basis through its free streaming service, The Roku Channel.",
            "The Supreme Court will review Broadcom's appeal in a shareholder lawsuit over the 2015 acquisition of Emulex.",
            "The Chinese central bank announced a fifth reduction in the required reserve ratio (RRR) for banks, freeing up approximately 116.5 billion yuan for new lending."
          ],
          "Negative Events": [
            "Apple cut its fiscal first quarter revenue forecast from $89-$93 billion to $84 billion due to weaker demand in China and fewer iPhone upgrades.",
            "Apple's underperformance in Q1 triggered risk aversion mood in markets, causing a drop in USD JPY pair and a gain in Japanese yen."
          ]
       }
CPU times: total: 30min 59s
Wall time: 5min 2s


In [20]:
%%time
data_1['Key Events'] = data_1['News'].progress_apply(lambda x: response_mistral_1(prompt,x))

  0%|          | 0/18 [00:00<?, ?it/s]Llama.generate: prefix-match hit
 11%|█         | 2/18 [01:01<08:10, 30.63s/it]Llama.generate: prefix-match hit
 17%|█▋        | 3/18 [04:06<23:43, 94.88s/it]Llama.generate: prefix-match hit
 22%|██▏       | 4/18 [07:10<29:53, 128.12s/it]Llama.generate: prefix-match hit
 28%|██▊       | 5/18 [09:39<29:19, 135.38s/it]Llama.generate: prefix-match hit
 33%|███▎      | 6/18 [12:57<31:16, 156.34s/it]Llama.generate: prefix-match hit
 39%|███▉      | 7/18 [15:05<26:58, 147.14s/it]Llama.generate: prefix-match hit
 44%|████▍     | 8/18 [17:26<24:13, 145.33s/it]Llama.generate: prefix-match hit
 50%|█████     | 9/18 [18:56<19:14, 128.23s/it]Llama.generate: prefix-match hit
 56%|█████▌    | 10/18 [20:45<16:16, 122.09s/it]Llama.generate: prefix-match hit
 61%|██████    | 11/18 [22:13<13:01, 111.69s/it]Llama.generate: prefix-match hit
 67%|██████▋   | 12/18 [24:14<11:27, 114.58s/it]Llama.generate: prefix-match hit
 72%|███████▏  | 13/18 [26:34<10:11, 122.38s/it]

CPU times: total: 3h 30min 5s
Wall time: 37min 56s





In [21]:
data_1["Key Events"].head() #Complete the code to print the first 5 rows of the 'Key_Events' column

0     {\n          "Positive Events": [\n          ...
1     {\n          "Positive Events": [\n          ...
2     {\n          "Positive Events": [\n          ...
3     {\n          "Positive Events": [\n          ...
4     {\n          "Positive Events": [\n          ...
Name: Key Events, dtype: object

In [22]:
data_1['model_response_parsed'] = data_1['Key Events'].apply(extract_json_data)
data_1.head()

Unnamed: 0,Date,News,Key Events,model_response_parsed
0,2019-01-06,The tech sector experienced a significant dec...,"{\n ""Positive Events"": [\n ...",{'Positive Events': ['Roku Inc announced plans...
1,2019-01-13,Sprint and Samsung plan to release 5G smartph...,"{\n ""Positive Events"": [\n ...",{'Positive Events': ['Sprint and Samsung plann...
2,2019-01-20,The U.S. stock market declined on Monday as c...,"{\n ""Positive Events"": [\n ...",{'Positive Events': ['Dialog Semiconductor rep...
3,2019-01-27,"The Swiss National Bank (SNB) governor, Andre...","{\n ""Positive Events"": [\n ...",{'Positive Events': ['IBM's stock price increa...
4,2019-02-03,Caterpillar Inc reported lower-than-expected ...,"{\n ""Positive Events"": [\n ...",{'Positive Events': ['Apple reported higher-th...


In [23]:
model_response_parsed = pd.json_normalize(data_1['model_response_parsed'])
model_response_parsed.head()

Unnamed: 0,Positive Events,Negative Events
0,[Roku Inc announced plans to offer premium vid...,[Apple lowered its fiscal Q1 revenue guidance ...
1,[Sprint and Samsung planning to release 5G sma...,[Geely forecasting flat sales for 2019 due to ...
2,[Dialog Semiconductor reported fourth quarter ...,[China's exports and imports dropped unexpecte...
3,[IBM's stock price increased after hours due t...,[The Swiss National Bank governor emphasized t...
4,[Apple reported higher-than-expected revenue a...,[Caterpillar Inc reported lower-than-expected ...


In [24]:
final_output = pd.concat([data_1.reset_index(drop=True),model_response_parsed],axis=1)
final_output.drop(['Key Events','model_response_parsed'], axis=1, inplace=True)
final_output.columns = ['Week End Date', 'News', 'Week Positive Events', 'Week Negative Events']

final_output.head()

Unnamed: 0,Week End Date,News,Week Positive Events,Week Negative Events
0,2019-01-06,The tech sector experienced a significant dec...,[Roku Inc announced plans to offer premium vid...,[Apple lowered its fiscal Q1 revenue guidance ...
1,2019-01-13,Sprint and Samsung plan to release 5G smartph...,[Sprint and Samsung planning to release 5G sma...,[Geely forecasting flat sales for 2019 due to ...
2,2019-01-20,The U.S. stock market declined on Monday as c...,[Dialog Semiconductor reported fourth quarter ...,[China's exports and imports dropped unexpecte...
3,2019-01-27,"The Swiss National Bank (SNB) governor, Andre...",[IBM's stock price increased after hours due t...,[The Swiss National Bank governor emphasized t...
4,2019-02-03,Caterpillar Inc reported lower-than-expected ...,[Apple reported higher-than-expected revenue a...,[Caterpillar Inc reported lower-than-expected ...
