In [1]:
import ipynb.fs.full.dict_maker as dictionary
import requests
import feedparser
import pandas 
from datetime import datetime

In [2]:
class livemint_reader:
    
    @staticmethod
    def read_feed(url:str) -> pandas.DataFrame:
        try:
            print(f'Reading feed from {url} for Livemint...')
            feed = feedparser.parse(url)
            titles = []
            links = []
            link_ids = []
            article_dates = []
            article_summaries = []
            
            for entry in feed.entries:
                titles.append(entry.title)
                links.append(entry.link)
                link_ids.append(entry.link[-19:][:14])
                article_dates.append(datetime.strptime(entry.published[5:], '%d %b %Y %H:%M:%S %z'))
                article_summaries.append(entry.summary)

            df = pandas.DataFrame(
                    data={
                        'title': titles,
                        'link': links,
                        'link_id': link_ids,
                        'link_date': article_dates,
                        'link_summary': article_summaries
                    }
                )
            if(not df.empty):          #feed can be empty sometimes
                df.insert(0, 'site_name', 'Livemint')
            return df
        
        except feedparser.ExpatError as e:
            print(f"XML parsing error: {e}")
        except feedparser.NotFeedException as e:
            print(f"Not a valid feed: {e}")
        except feedparser.CharacterEncodingUnknown as e:
            print(f"Unknown character encoding error: {e}")
        except feedparser.ParseError as e:
            print(f"Parse error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

In [3]:
class tradebrains_reader:

    @staticmethod
    def read_feed(url:str) -> pandas.DataFrame:
        try:
            print(f'Reading feed from {url} Tradebrains...')
            feed = feedparser.parse(url)
            titles = []
            links = []
            link_ids = []
            article_dates = []
            article_summaries = []
            
            for entry in feed.entries:
                titles.append(entry.title)
                links.append(entry.link)
                link_ids.append(entry.guid[-5:])
                article_dates.append(datetime.strptime(entry.published[5:], '%d %b %Y %H:%M:%S %z'))
                article_summaries.append(entry.summary[:1000])           #Could be very long, so have shortened to 1000 chars to fit in the db 

            df = pandas.DataFrame(
                    data={
                        'title': titles,
                        'link': links,
                        'link_id': link_ids,
                        'link_date': article_dates,
                        'link_summary': article_summaries
                    }
                )
            
            if(not df.empty):          #feed can be empty sometimes
                df.insert(0, 'site_name', 'Tradebrains')
            return df
        
        except feedparser.ExpatError as e:
            print(f"XML parsing error: {e}")
        except feedparser.NotFeedException as e:
            print(f"Not a valid feed: {e}")
        except feedparser.CharacterEncodingUnknown as e:
            print(f"Unknown character encoding error: {e}")
        except feedparser.ParseError as e:
            print(f"Parse error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

In [4]:
class economic_times_reader:

    @staticmethod
    def read_feed(url:str) -> pandas.DataFrame:
        try:
            print(f'Reading feed from {url} Economic Times...')
            feed = feedparser.parse(url)
            titles = []
            links = []
            link_ids = []
            article_dates = []
            article_summaries = []

            if (url == 'https://economictimes.indiatimes.com/rsssymbolfeeds/commodityname-Gold.cms'): #spl case for Gold RSS on ET because of timeframe type
                time_format =  '%Y-%m-%dT%H:%M:%S%z'
                start_idx = 0
            else:
                time_format = '%d %b %Y %H:%M:%S %z'
                start_idx = 5
            
            for entry in feed.entries:
                titles.append(entry.title)
                links.append(entry.link)
                link_ids.append(entry.guid[-13:][:9]) if ('guid' in entry) else link_ids.append('None')                                 #May not exist, hence the case
                article_dates.append(datetime.strptime(entry.published[start_idx:], time_format))  if ('published' in entry) else article_dates.append(datetime.date.today())   #May not exist, hence the case
                article_summaries.append(entry.summary[:1022]) if('summary' in entry) else article_summaries.append('None')      #Could be very long, so have shortened to 1000 chars to fit in the db. May not even exist

            df = pandas.DataFrame(
                    data={
                        'title': titles,
                        'link': links,
                        'link_id': link_ids,
                        'link_date': article_dates,
                        'link_summary': article_summaries
                    }
                )
            if(not df.empty):          #feed can be empty sometimes
                df.insert(0, 'site_name', 'Economic Times')
            return df
        
        except feedparser.ExpatError as e:
            print(f"XML parsing error: {e}")
        except feedparser.NotFeedException as e:
            print(f"Not a valid feed: {e}")
        except feedparser.CharacterEncodingUnknown as e:
            print(f"Unknown character encoding error: {e}")
        except feedparser.ParseError as e:
            print(f"Parse error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

In [5]:
class hindubusinessline_times_reader:

    @staticmethod
    def read_feed(url:str) -> pandas.DataFrame:
        try:
            print(f'Reading feed from {url} Hindu Business Line...')
            feed = feedparser.parse(url)
            titles = []
            links = []
            link_ids = []
            article_dates = []
            article_summaries = []

            time_format = '%d %b %Y %H:%M:%S %z'
            
            for entry in feed.entries:
                titles.append(entry.title)
                links.append(entry.link)
                link_ids.append(entry.guid) if ('guid' in entry) else link_ids.append('None')                                 #May not exist, hence the case
                article_dates.append(datetime.strptime(entry.published[5:], time_format))  if ('published' in entry) else article_dates.append(datetime.date.today())   #May not exist, hence the case
                article_summaries.append(entry.summary[:1022]) if('summary' in entry) else article_summaries.append('None')      #Could be very long, so have shortened to 1000 chars to fit in the db. May not even exist

            df = pandas.DataFrame(
                    data={
                        'title': titles,
                        'link': links,
                        'link_id': link_ids,
                        'link_date': article_dates,
                        'link_summary': article_summaries
                    }
                )
            if(not df.empty):          #feed can be empty sometimes
                df.insert(0, 'site_name', 'Hindu Business Line')
            return df
        
        except feedparser.ExpatError as e:
            print(f"XML parsing error: {e}")
        except feedparser.NotFeedException as e:
            print(f"Not a valid feed: {e}")
        except feedparser.CharacterEncodingUnknown as e:
            print(f"Unknown character encoding error: {e}")
        except feedparser.ParseError as e:
            print(f"Parse error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {e}")

In [6]:
class feed_reader(livemint_reader, tradebrains_reader, economic_times_reader, hindubusinessline_times_reader):
    @classmethod
    def read_feed(cls, site: str, url: str) -> pandas.DataFrame:
        if(site == 'Livemint'):
            return livemint_reader.read_feed(url)
        elif(site == 'Tradebrains'):
            return tradebrains_reader.read_feed(url)
        elif(site == 'Economic Times'):
            return economic_times_reader.read_feed(url)
        elif(site == 'Hindu Business Line'):
            return hindubusinessline_times_reader.read_feed(url)

In [7]:
# One line tester
#url = dictionary.dict_to_json.get_value_from_file('Markets')
#df = livemint_reader.read_feed(url)
#print(df)

In [8]:
#obj = datetime.strptime('19 May 2025 11:53:01 +0530', '%d %B %Y %H:%M:%S %z')
#print(obj)

In [9]:
# One line tester
#url = dictionary.dict_to_json.get_value_from_file('Markets')
#df = tradebrains_reader.read_feed('https://tradebrains.in/feed/')
#for idx, row in df.iterrows():
#    print(len(row['link_summary']))

In [10]:
# One line tester
#url = dictionary.dict_to_json.get_value_from_file('Economic Times', 'Markets')
#df = economic_times_reader.read_feed(url)
#df.head()
#for idx, row in df.iterrows():
#    print(len(row['link_summary']))

In [11]:
# One line tester
#url = dictionary.dict_to_json.get_value_from_file('Hindu Business Line', 'Top Gainers/Top Losers')
#df = hindubusinessline_times_reader.read_feed(url)
#df.head()
#for idx, row in df.iterrows():
#    print(row['link_summary'])