# Getting news from Tuko News

## Schedule

- ensure that data mining runs every hour 

## Request

- extract the data from the API

In [1]:
import csv
import requests

In [16]:
from bs4 import BeautifulSoup
import schedule
import time
import re

In [36]:
def fetch_append_data(link, name, c):
    '''
    fetch_append_data
    
    A function that collects information from a link and appends it to a csv file
    
    input : link 
        a url that has the target information
    output: name.csv
        a file of the type <name>.csv
    '''
    url = link
    d = r'/'+ c + '/(\d+)-'
    
    try:
#         fetch content
        response = requests.get(url)
#     if there is no response or a http error
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser') # parse the html
        
        data = [] # where the extracted data will be placed 
        
        for news in soup.find_all("article"):
            title_tag = news.find('span')
            title = title_tag.get_text().strip() if title_tag else "Title Not Found"
            
            image_tag = news.find('img')
            image = image_tag['src'] if image_tag else "Image Not Found"
            
            url_tag = news.find('a')
            url = url_tag['href'] if url_tag else "URL Not Found"
            
            post_id_match = re.search(d, url)
            if post_id_match:
                post_id = int(post_id_match.group(1))
            else:
                post_id = None
            time_tag = news.find('time')
            time_posted = time_tag['datetime'] if time_tag else "Time Not Found"
            
            data.append([post_id, title, image, url, time_posted])
        
        
#         append to csv
        csv_file = str(name) + ".csv" 
        with open(csv_file, 'a', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            for row in data:
                writer.writerow(row)
        
        print("Data appended to CSV successfully.")

    except Exception as e:
        print("Error fetching or appending data:", e)
        


In [37]:
samp_url = 'https://www.tuko.co.ke/business-economy/'
fetch_append_data(samp_url, 'business', 'business-economy')

Data appended to CSV successfully.


# Adding repeating time


In [40]:
def job():
    '''
    parameters:
    
        URL -- the tuko news link you are provided

        field -- the name of the csv file or field which data is being scrapped for
        
        label -- the value between the last forward slash in the url 
            
            /<label>/
    '''
    field = 'business'
    label = 'business-economy'
    fetch_append_data(samp_url, field, label)
    

In [41]:
schedule.every().hour.do(job)

# Run indefinitely
while True:
    schedule.run_pending()
    time.sleep(1)

KeyboardInterrupt: 