In [1]:
import pandas as pd
import yfinance as yf
from yahooquery import search
import datetime
import os
import re


In [2]:
## First step :clean the dataset
def clean_name(name):
    # Remove all Chinese characters and special symbols, retaining only letters, numbers, and spaces
    name = re.sub(r'[^\x00-\x7F]+', '', name)  # Remove non-ASCII characters
    name = re.sub(r'[^\w\s]', '', name)  # Remove special symbols except alphanumeric and whitespace
    name = re.sub(' ', '', name)  # Remove spaces 
    name = name.lower() # Convert all uppercase letters to lowercase
    return name.strip()

mydata = pd.read_csv('mydata.csv')
# Step 1: Rename column titles to English
mydata.columns = ["Company_Name", "Transaction_Code", "RIC", "Market_Value_Million", 
              "Total_Revenue_Million", "P_E_Ratio", "Yield_Percent", "Sector", "GTI_Score"]

# Step 2: Clean the 'Company_Name' column by:
mydata['cleaned_transaction_name'] = mydata['Company_Name'].apply(clean_name)
new_data = mydata[['cleaned_transaction_name','Transaction_Code','RIC']
                  ]
# Extract the part of 'RIC' after the period and create the 'company_ticker' column
mydata['company_ticker'] = mydata['Transaction_Code'] + '.' + mydata['RIC'].str.split('.').str[-1]


new_data = mydata[['cleaned_transaction_name', 'company_ticker']]


In [3]:
company_list = [
'apple',
## Software and Services 
'Captii',
'CSE Global',
'V2Y Corp',
'SinoCloud Grp',
'Microsoft',
'Amazon',
'Captii Limited',
'CSE Global',
'V2Y Corporation Ltd',
'SinoCloud Group Ltd',
## Technology Hardware and Equipment
'Addvalue Tech',
'Nanofilm',
'Venture',
'Apple',
'Addvalue Technologies Ltd',
'Razer',
'Nanofilm',
'Venture Corporation Limited',
## Semiconductors and Semiconductor Equipment
'AdvancedSystems',
'AEM SGD',
'Asia Vets',
'ASTI',
'UMS',
'Adanvanced Systems Automation Limited',
'AEM Holdings Ltd',
'Asia Vets Holdings Ltd',
'ASTI Holdings Ltd',
'UMS Holdings Ltd',
## Information Technology
'Audience',
'Lazada',
'Grab',
'Sea Ltd',
'Carousell',
'Audience Analytics Limited',
## Engineering Services
'Singtel',
'M1 Singapore',
'ST Engineering',
'GSS Energy'
]


def get_ticker_symbol(company_name):
    # Use yahooquery's search function to get the ticker symbol
    result = search(company_name)
    if 'quotes' in result and len(result['quotes']) > 0:
        # Return the first matching ticker symbol
        ticker_symbol = result['quotes'][0]['symbol']
        return ticker_symbol
    else:
        return None


## Add more data
for company in company_list:
    ticker = get_ticker_symbol(company)
    new_row = pd.Series({'cleaned_transaction_name': company, 
                     'company_ticker': ticker, 
                     })  
    new_data = pd.concat([new_data, new_row.to_frame().T], ignore_index=True)
    print(company,ticker,new_data.shape)
    

new_data.to_csv('company_ticker_mapping.csv')


apple AAPL (628, 2)
Captii AWV.SI (629, 2)
CSE Global CSYJF (630, 2)
V2Y Corp V2Y.SI (631, 2)
SinoCloud Grp LYY.SI (632, 2)
Microsoft MSFT (633, 2)
Amazon AMZN (634, 2)
Captii Limited AWV.SI (635, 2)
CSE Global CSYJF (636, 2)
V2Y Corporation Ltd V2Y.SI (637, 2)
SinoCloud Group Ltd None (638, 2)
Addvalue Tech A31.SI (639, 2)
Nanofilm MZH.SI (640, 2)
Venture TPVG (641, 2)
Apple AAPL (642, 2)
Addvalue Technologies Ltd A31.SI (643, 2)
Razer None (644, 2)
Nanofilm MZH.SI (645, 2)
Venture Corporation Limited VENTURE19.BK (646, 2)
AdvancedSystems WJ9.SI (647, 2)
AEM SGD AWX.SI (648, 2)
Asia Vets 5RE.SI (649, 2)
ASTI ASTI (650, 2)
UMS 558.SI (651, 2)
Adanvanced Systems Automation Limited None (652, 2)
AEM Holdings Ltd AEMFF (653, 2)
Asia Vets Holdings Ltd 5RE.SI (654, 2)
ASTI Holdings Ltd None (655, 2)
UMS Holdings Ltd None (656, 2)
Audience 1AZ.SI (657, 2)
Lazada None (658, 2)
Grab GRAB (659, 2)
Sea Ltd C7S.F (660, 2)
Carousell None (661, 2)
Audience Analytics Limited 1AZ.SI (662, 2)
Singtel 

In [4]:
def get_stock_data(company_name):
    # Map company name to ticker symbol
    ticker_symbol = get_company_ticker(company_name)
    if not ticker_symbol:
        print(f"Can't get ticker symbol for {company_name}")
        return

    # Fetch data from 2020-01-01 to today
    start_date = '2020-01-01'
    # end_date = datetime.datetime.today().strftime('%Y-%m-%d')
    end_date = datetime.date.today().strftime('%Y-%m-%d')
    data = yf.download(ticker_symbol, start=start_date, end=end_date)
    
    # Check if data is available
    if data.empty:
        print(f"No data available for {company_name} ({ticker_symbol})")
        return

    # Check if 'Adj Close' column exists; if not, use 'Close' as a fallback
    if 'Adj Close' in data.columns:
        data = data[['Adj Close']]
    elif 'Close' in data.columns:
        data = data[['Close']]
        data.rename(columns={'Close': 'Adj Close'}, inplace=True)
    else:
        print(f"No 'Adj Close' or 'Close' data available for {company_name} ({ticker_symbol})")
        return
    
    # Format date and reset index
    data.reset_index(inplace=True)
    data['Date'] = data['Date'].dt.strftime('%Y-%m-%d')

    # Create folder if it doesn't exist
    folder_path = 'StockPrice'
    os.makedirs(folder_path, exist_ok=True)

    # Save data as CSV with company name in filename
    csv_filename = f"{folder_path}/{company_name}_stock_data.csv"
    data.to_csv(csv_filename, index=False)
    print(f"Data saved to {csv_filename}")

    # Retrieve the most recent adjusted close price if real-time data is unavailable
    ticker = yf.Ticker(ticker_symbol)
    history = ticker.history(period='1d')
    if 'Adj Close' in history.columns:
        current_price = history['Adj Close'].iloc[-1]
    elif 'Close' in history.columns:
        current_price = history['Close'].iloc[-1]
    else:
        current_price = None

    if current_price is not None:
        print(f"{company_name} ({ticker_symbol}) most recent Adj Close: ${current_price}")
    else:
        print(f"No recent 'Adj Close' or 'Close' price available for {company_name} ({ticker_symbol})")

def get_company_ticker(company_name):
    company_ticker_map = pd.read_csv('company_ticker_mapping.csv')
    # Find the row in new_data that matches the company_name
    cleaned_input_name= clean_name(company_name)
    result = company_ticker_map[company_ticker_map['cleaned_transaction_name'] == cleaned_input_name]
    
    # If a matching row is found, return the 'company_ticker' value
    if not result.empty:
        ticker = result['company_ticker']
        return result.iloc[0]['company_ticker']
    else:
        return None  # Return None if no match is found

In [5]:

# Example usage
company_name = 'apple'
get_stock_data(company_name)

[*********************100%***********************]  1 of 1 completed

Data saved to StockPrice/apple_stock_data.csv
apple (AAPL) most recent Adj Close: $226.9600067138672



