In [24]:
import yfinance as yf
import pandas as pd


In [25]:
# --- Step 1: Define the stock ticker and date range ---
# We're using Reliance Industries from the National Stock Exchange (NSE) in India.
# The start and end dates are chosen based on your previous attempts.
ticker = "RELIANCE.NS"
start_date = "2024-07-29"
end_date = "2025-07-29"

In [26]:
print(f"Attempting to download data for {ticker} from {start_date} to {end_date}...\n")

Attempting to download data for RELIANCE.NS from 2024-07-29 to 2025-07-29...



In [28]:
try:
    data = yf.download(ticker, start=start_date, end=end_date, auto_adjust=True)
except Exception as e:
    print(f"Error downloading data: {e}")
    print("Please check your internet connection or if the ticker symbol is correct.")
    exit()

[*********************100%***********************]  1 of 1 completed


In [31]:
# --- Step 3: Basic data validation
# Check if the DataFrame is empty.
if data.empty:
    print(f"No data found for {ticker} in the specified date range. Please check dates or ticker.")
    exit()
else:
    print("Data downloaded successfully and is not empty.")

Data downloaded successfully and is not empty.


In [32]:
data = data.reset_index(inplace=False)

In [33]:
cleaned_column_names = []
for col in data.columns:
    if isinstance(col, tuple):
        # If it's a tuple (from a MultiIndex, e.g., ('Close', 'RELIANCE.NS'))
        # We take the first element (the metric name), convert to lowercase, and replace spaces with underscores.
        cleaned_column_names.append(col[0].lower().replace(" ", "_"))
    else:
        # If it's a simple string (like 'Date' after reset_index())
        # We convert it to lowercase and replace spaces with underscores.
        cleaned_column_names.append(str(col).lower().replace(" ", "_"))

In [34]:
data.columns = cleaned_column_names

In [35]:
if 'date' in data.columns:
    cols = ['date'] + [col for col in data.columns if col != 'date']
    data = data[cols]

In [36]:
#Display the first few rows of the cleaned DataFrame ---
print("\nCleaned DataFrame Head:")
print(data.head())


Cleaned DataFrame Head:
        date        close         high          low         open    volume
0 2024-07-29  1514.958252  1522.333246  1506.661460  1506.835795   7538550
1 2024-07-30  1508.031738  1519.841631  1504.892369  1511.893631   8228208
2 2024-07-31  1500.332886  1505.340791  1496.072322  1498.912657  10057346
3 2024-08-01  1510.174561  1512.865379  1499.211768  1506.885674  10764222
4 2024-08-02  1494.253418  1504.219593  1488.223955  1499.909271  10268248


In [37]:
# Display DataFrame Info for a quick overview ---
print("\nDataFrame Info:")
data.info()


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    250 non-null    datetime64[ns]
 1   close   250 non-null    float64       
 2   high    250 non-null    float64       
 3   low     250 non-null    float64       
 4   open    250 non-null    float64       
 5   volume  250 non-null    int64         
dtypes: datetime64[ns](1), float64(4), int64(1)
memory usage: 11.8 KB


In [38]:
#Display DataFrame Description for summary statistics ---
print("\nDataFrame Description:")
print(data.describe())


DataFrame Description:
                             date        close         high          low  \
count                         250   250.000000   250.000000   250.000000   
mean   2025-01-26 03:44:38.400000  1355.371805  1367.701548  1344.515035   
min           2024-07-29 00:00:00  1161.900024  1171.250000  1114.849976   
25%           2024-10-25 18:00:00  1253.925018  1269.950012  1243.862488   
50%           2025-01-25 12:00:00  1343.625000  1360.737488  1334.000000   
75%           2025-04-29 18:00:00  1455.313324  1468.454379  1445.731262   
max           2025-07-28 00:00:00  1541.500000  1551.000000  1530.199951   
std                           NaN   106.492419   106.714501   107.466501   

              open        volume  
count   250.000000  2.500000e+02  
mean   1356.301661  1.307233e+07  
min    1132.199951  0.000000e+00  
25%    1258.225006  8.731582e+06  
50%    1340.000000  1.126159e+07  
75%    1456.702260  1.593660e+07  
max    1536.699951  4.162073e+07  
std     107

In [40]:
# Save to CSV ---
data.to_csv("reliance_stock.csv", index=False)
print("\n✅ Data saved to 'reliance_stock.csv'")


✅ Data saved to 'reliance_stock.csv'
