In [1]:
import pandas as pd
import yfinance as yf
from datetime import datetime

# Define the start and end dates for the data
start_date = '2019-01-01'
end_date = '2023-12-31'

# Define the frequency for the data
frequency = '1y'  # Options: '1d' for daily, '1mo' for monthly, '1y' for yearly

# Read the CSV file to get the list of stock symbols
file_path = "C:/Users/amirx/OneDrive/Desktop/ticker.csv"  # Update the file path if needed
try:
    symbols_df = pd.read_csv(file_path)
    print(symbols_df.head())  # Inspect the first few rows
    print(symbols_df.columns)  # Print the column names
    
    # Strip whitespace from column names
    symbols_df.columns = symbols_df.columns.str.strip()
    
    # Check if 'Symbol' column is present
    if 'Symbol' not in symbols_df.columns:
        raise ValueError("CSV file must contain 'Symbol' column.")
except Exception as e:
    print(f"Error reading CSV file: {e}")
    exit()

# Initialize a list to hold all data
data_frames = []

# Loop through each symbol in the CSV
for index, row in symbols_df.iterrows():
    symbol = row['Symbol']
    
    try:
        # Fetch stock data from Yahoo Finance
        df = yf.download(symbol+".KA", start=start_date, end=end_date)
        
        if df.empty:
            print(f"No data found for {symbol}.")
            continue
        
        # Reset index to convert the date index to a column
        df.reset_index(inplace=True)
        
        # Resample the data based on the specified frequency
        if frequency == '1mo':
            df.set_index('Date', inplace=True)
            df = df.resample('M').ffill()  # Monthly frequency
            df.reset_index(inplace=True)
        elif frequency == '1y':
            df.set_index('Date', inplace=True)
            df = df.resample('A').ffill()  # Yearly frequency (using 'A' instead of 'Y')
            df.reset_index(inplace=True)
        
        # Add the symbol column to the dataframe
        df['Symbol'] = symbol
        
        # Append the data frame to the list
        data_frames.append(df)
        print(f"Data for {symbol} downloaded successfully.")
        
    except Exception as e:
        print(f"Failed to download data for {symbol}: {e}")

# Combine all the data into one DataFrame
if data_frames:
    combined_data = pd.concat(data_frames, ignore_index=True)

    # Check for duplicate columns
    combined_data = combined_data.loc[:, ~combined_data.columns.duplicated()]

    # Reorder the columns for better readability
    combined_data = combined_data[['Date', 'Symbol', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]

    # Display the combined dataframe
    print(combined_data.head())
    
    # Save the combined data to a new CSV file
    output_file_path = "C:/Users/amirx/OneDrive/Desktop/combined_data.csv"  # Update the file path as needed
    combined_data.to_csv(output_file_path, index=False)
    print(f"Combined data saved to {output_file_path}.")
else:
    print("No data available.")


  Symbol
0    786
1   EMCO
2   ABOT
Index(['Symbol'], dtype='object')


[*********************100%***********************]  1 of 1 completed
  df = df.resample('A').ffill()  # Yearly frequency (using 'A' instead of 'Y')


Data for 786 downloaded successfully.


[*********************100%***********************]  1 of 1 completed
  df = df.resample('A').ffill()  # Yearly frequency (using 'A' instead of 'Y')


Data for EMCO downloaded successfully.


[*********************100%***********************]  1 of 1 completed

Data for ABOT downloaded successfully.
        Date Symbol       Open   High        Low  Close  Adj Close  Volume
0 2019-12-31    786  25.120001  25.50  25.120001  25.40      25.40    1500
1 2020-12-31    786  12.000000  12.00  11.750000  11.81      11.81   21000
2 2021-12-31    786   6.550000   6.70   6.450000   6.45       6.45   10500
3 2022-12-31    786   4.680000   4.99   4.680000   4.99       4.99  167500
4 2023-12-31    786   5.800000   5.80   5.130000   5.50       5.50    7000
Combined data saved to C:/Users/amirx/OneDrive/Desktop/combined_data.csv.



  df = df.resample('A').ffill()  # Yearly frequency (using 'A' instead of 'Y')


In [2]:
combined_data

Unnamed: 0,Date,Symbol,Open,High,Low,Close,Adj Close,Volume
0,2019-12-31,786,25.120001,25.5,25.120001,25.4,25.4,1500
1,2020-12-31,786,12.0,12.0,11.75,11.81,11.81,21000
2,2021-12-31,786,6.55,6.7,6.45,6.45,6.45,10500
3,2022-12-31,786,4.68,4.99,4.68,4.99,4.99,167500
4,2023-12-31,786,5.8,5.8,5.13,5.5,5.5,7000
5,2019-12-31,EMCO,25.0,25.0,25.0,25.0,23.419769,0
6,2020-12-31,EMCO,31.709999,31.709999,29.0,29.709999,27.832052,18500
7,2021-12-31,EMCO,29.5,29.5,29.5,29.5,28.29253,0
8,2022-12-31,EMCO,23.85,23.85,23.85,23.85,23.373001,0
9,2023-12-31,EMCO,37.02,38.0,37.009998,37.009998,37.009998,2500
