In [1]:
from pypdf import PdfReader
from openai import OpenAI
import json
from pathlib import Path
import csv

API_KEY = 'sk-bedceae2ceba437f944db22706354095'
import os

In [2]:
def make_prompt_dividend_detection():
    return """
You are a financial expert.

Go through this content and check if there is any mention of dividends

Your task is to find the dividend the company pays.

Your output must be in JSON format with the following keys:

{
  "is_dividend_mentioned": true/false,
  "dividend_amount": "..."
}

Avoid making assumptions beyond the text.
"""

def make_prompt_dividend_calculation():
    return """
You are a financial expert.

Go through this content and assimilate all the information about dividends

Your task is to find the dividend the company pays in all forms.

Your output must be in JSON format with the following keys:

{
  "dividend_amount_paid": "..."
}

Avoid making assumptions beyond the text. Dont put anything extra, just one value
"""

In [3]:
def make_dividend_context(res_dic):
    context_string=''
    for res_dic in results:
        if res_dic['is_dividend_mentioned']==True:
            #print(res_dic)            
            context_string+=res_dic['page_text']
    return context_string
            
                

In [41]:
output_csv = Path("dividend_history.csv")

# Load existing data if file exists
existing_data = {}
if output_csv.exists():
    with open(output_csv, mode='r') as f:
        reader = csv.DictReader(f)
        existing_data = {int(row['year']): float(row['dividend_amount_paid']) for row in reader}

        
api_client = OpenAI(api_key=API_KEY, base_url="https://api.deepseek.com")
years=[i for i in range(2015,2025)]

for year in years:
    # Skip if already processed
    if year in existing_data:
        print(f"Skipping {year} - already in CSV")
        continue    
    print(year)
    file_name=f'04-annual-financial-english-{year}.pdf'
    try:
        reader = PdfReader(f"ZHCD.QA/{file_name}")
    except:
        print('issues reading')
        continue
    print(len(reader.pages))

    
    system_prompt=make_prompt_dividend_detection()
    
    results=[]
    for page_num in range(len(reader.pages)):
        print(page_num)
        page = reader.pages[page_num]
        #print(page.extract_text())
        text=page.extract_text()
        if len(text)<50:
            continue
        if 'divi' not in text.lower():
            continue
        
        response = api_client.chat.completions.create(
            model="deepseek-chat",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": text}
            ],
            response_format={'type': 'json_object'}
        )    
        #print(response.choices[0].message.content)
        dic_res = json.loads(response.choices[0].message.content)
        dic_res['page_num']=page_num
        dic_res['page_text']=text
        results.append(dic_res)
        print('*'*10)

    context_string=make_dividend_context(dic_res)
    system_prompt = make_prompt_dividend_calculation()
    response = api_client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": context_string}
        ],
        response_format={'type': 'json_object'}
    )    
    print(response.choices[0].message.content)
    dic_res = json.loads(response.choices[0].message.content)    
    dividend_amount_paid=dic_res['dividend_amount_paid']
    if ',' in dividend_amount_paid:
        dividend_amount_paid=dividend_amount_paid.replace(',','')
    print(float(dividend_amount_paid))      
    # Append to CSV after each successful year
    with open(output_csv, mode='a', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=['year', 'dividend_amount_paid'])
        if f.tell() == 0:  # Write header if file is empty
            writer.writeheader()
        writer.writerow({
            'year': year,
            'dividend_amount_paid': dividend_amount_paid
        })
        
    print(f"Saved {year}: {dividend_amount_paid}")


Ignoring wrong pointing object 7 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 23 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 67 0 (offset 0)
Ignoring wrong pointing object 98 0 (offset 0)
Ignoring wrong pointing object 100 0 (offset 0)
Ignoring wrong pointing object 200 0 (offset 0)


Skipping 2015 - already in CSV
Skipping 2016 - already in CSV
Skipping 2017 - already in CSV
Skipping 2018 - already in CSV
2019
issues reading
Skipping 2020 - already in CSV
2021
issues reading
2022
51
0
1
2
3
4
**********
5
6
7
8
9
**********
10
**********
11
**********
12
13
**********
14
15
16
17
18
19
20
21
22
23
**********
24
25
**********
26
27
28
**********
29
**********
30
31
32
33
34
35
36
37
**********
38
39
40
**********
41
**********
42
**********
43
**********
44
45
46
47
48
**********
49
50
{
  "dividend_amount_paid": "169,452,998"
}
169452998.0
Saved 2022: 169452998
2023
50
0
1
2
3
4
**********
5
6
7
8
9
**********
10
**********
11
**********
12
13
**********
14
15
16
17
18
19
20
21
22
23
**********
24
25
**********
26
27
28
**********
29
**********
30
31
32
33
34
35
36
37
**********
38
39
40
**********
41
**********
42
**********
43
**********
44
45
46
47
48
**********
49
{
  "dividend_amount_paid": "169,713,695"
}
169713695.0
Saved 2023: 169713695
2024
44
0
1
2
3
4
**

In [24]:
results

[{'is_dividend_mentioned': False,
  'dividend_amount': None,
  'page_num': 4,
  'page_text': 'Independent auditors’ report (continued) \nTo the Shareholders of Zad Holding Company Q.P.S.C. \nOther Information \nThe Board of Directors is responsible for the other information. The other information comprises \nthe information included in the Company’s Annual Report but does not include the consolidated \nfinancial statements and our auditors’ report thereon. The Annual report is expected to be made \navailable to us after the date of this auditors’ report. \nOur opinion on the consolidated financial statements does not cover the other information  and we \ndo not and will not express any form of assurance conclusion thereon.  \nIn connection with our audit of the consolidated financial statements, our responsibility is to read \nthe other information identified above and, in doing so, consider whether th e other information is \nmaterially inconsistent with the consolidated financial sta

In [23]:
context_string

"Zad Holding Company Q.P.S.C. \n \nNotes to the consolidated financial statements \nAs at and for the year ended 31 December 2022 \n22 \n \n         \n3. Significant accounting policies (continued) \n \ni) Financial instruments (continued) \n \nClassification and subsequent measurement of financial assets  (continued)  \n   \nTransfers of financial assets to third parties in transactions that do not qualify for derecognition are not \nconsidered sales for this purpose, consistent with the Group’s continuing recognition of the assets.   \n \nFinancial assets that are held for trading or are managed and whose performance is evaluated on a  \nfair value basis are measured at FVTPL. \n    \nFinancial assets – Assessment whether contractual cash flows are Solely Payments of Principal and Interest\n         \nFor the purposes of this assessment, ‘principal’ is defined as the fair value of the financial asset on initial \nrecognition. ‘Interest’ is defined as consideration for the time value 

In [None]:
print(dic_res['dividend_amount_paid'])

In [39]:
page = reader.pages[9]
        
text=page.extract_text()

In [40]:
text

'Zad Holding Company Q.P.S.C. \n \nConsolidated statement of changes in equity \nFor the year ended 31 December 2022 In Qatari Riyals \n8 \n \n  Share  Legal  *Capital  **Fair value  Retained  Total  capital  reserve  reserve  reserve  earnings   \n \nBalance at 1 January 2021 236,997,200  563,120,753  15,000,000  17,723,138  717,929,595  1,550,770,686 \nImpact of restatements (Note 38) -  -  -  -  (206,143,927)  (206,143,927) \nRestated balances at 1 January 2021 236,997,200  563,120,753  15,000,000  17,723,138  511,785,668  1,344,626,759             \nProfit for the year (Restated) -  -  -  -  193,943,512  193,943,512 \nOther comprehensive income (Note 9) -  -  -  (49,150,541)  -  (49,150,541) \nGain transferred on disposal of investment \nsecurities – ‘At FVOCI’ -  -  -  (186,109)  186,109  - \nContribution to social and sports fund -  -  -  -  (4,751,152)  (4,751,152) \nBonus shares (Note 19) 23,699,720  -  -  -  (23,699,720)  - \nDividend distribution (Note 22) -  -  -  -  (165,89