In [1]:
# %%
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta

# 1. تعریف پارامترهای دیتا
# نماد جفت ارز در یاهو فایننس به صورت Ticker=X است
ticker = "AUDUSD=X" 

# محاسبه تاریخ شروع (دقیقا یک سال قبل از امروز)
end_date = datetime.now()
start_date = end_date - timedelta(days=365)

# 2. دانلود دیتا با استفاده از yfinance
print(f"در حال دانلود دیتای {ticker} از تاریخ {start_date.strftime('%Y-%m-%d')} تا {end_date.strftime('%Y-%m-%d')}...")

# تابع download اطلاعات را در یک pandas DataFrame میریزد
audusd_data = yf.download(ticker, start=start_date, end=end_date)

# 3. نمایش چند سطر اول و آخر دیتا برای بررسی
print("\nچند ردیف اول دیتا:")
print(audusd_data.head())

print("\nچند ردیف آخر دیتا:")
print(audusd_data.tail())

# بررسی تعداد کل ردیف های دریافت شده
print(f"\nتعداد {len(audusd_data)} کندل روزانه دریافت شد.")

# Commit for data download branch - 2025-07-30

      

در حال دانلود دیتای AUDUSD=X از تاریخ 2024-07-31 تا 2025-07-31...


  audusd_data = yf.download(ticker, start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed


چند ردیف اول دیتا:
Price          Close      High       Low      Open   Volume
Ticker      AUDUSD=X  AUDUSD=X  AUDUSD=X  AUDUSD=X AUDUSD=X
Date                                                       
2024-07-31  0.654150  0.654220  0.648030  0.654150        0
2024-08-01  0.654700  0.655990  0.651470  0.654700        0
2024-08-02  0.649490  0.654760  0.648770  0.649490        0
2024-08-05  0.649722  0.651088  0.636549  0.649722        0
2024-08-06  0.651550  0.653971  0.647501  0.651550        0

چند ردیف آخر دیتا:
Price          Close      High       Low      Open   Volume
Ticker      AUDUSD=X  AUDUSD=X  AUDUSD=X  AUDUSD=X AUDUSD=X
Date                                                       
2025-07-25  0.659718  0.659900  0.655330  0.659561        0
2025-07-28  0.657298  0.658700  0.651630  0.657289        0
2025-07-29  0.652401  0.653000  0.649640  0.652061        0
2025-07-30  0.651322  0.652910  0.644920  0.651521        0
2025-07-31  0.643165  0.647794  0.642897  0.643832        0





In [2]:
print(audusd_data.columns)

MultiIndex([( 'Close', 'AUDUSD=X'),
            (  'High', 'AUDUSD=X'),
            (   'Low', 'AUDUSD=X'),
            (  'Open', 'AUDUSD=X'),
            ('Volume', 'AUDUSD=X')],
           names=['Price', 'Ticker'])


In [3]:
# --- Standardize and Flatten Column Names ---

# 1. Check if columns are a MultiIndex and flatten them
if isinstance(audusd_data.columns, pd.MultiIndex):
    print("MultiIndex detected. Flattening column names...")
    # Keep only the first level of the column names (e.g., 'Open' from ('Open', 'Ticker'))
    audusd_data.columns = audusd_data.columns.get_level_values(0)

# 2. Now that names are simple strings, convert them to lowercase
audusd_data.columns = [str(col).lower() for col in audusd_data.columns]

# 3. Print the final, clean column names to confirm
print("\nFinal standardized column names:")
print(audusd_data.columns)

# 4. Now, the dropna() command will work perfectly
audusd_data.dropna(subset=['open', 'high', 'low', 'close'], inplace=True)

print(f"\nRows after cleaning: {len(audusd_data)}")
print("✅ Missing data successfully removed.")

MultiIndex detected. Flattening column names...

Final standardized column names:
Index(['close', 'high', 'low', 'open', 'volume'], dtype='object')

Rows after cleaning: 259
✅ Missing data successfully removed.


In [4]:
# --- Remove Duplicate Data Step ---

print(f"Rows before checking for duplicates: {len(audusd_data)}")

# Remove any rows that have a duplicated index (Date)
audusd_data = audusd_data[~audusd_data.index.duplicated(keep='first')]

print(f"Rows after checking for duplicates: {len(audusd_data)}")
print("\n✅ Duplicate data check complete.")

Rows before checking for duplicates: 259
Rows after checking for duplicates: 259

✅ Duplicate data check complete.


In [5]:
# Just type the variable name in the last line of the cell
audusd_data.head()

Unnamed: 0_level_0,close,high,low,open,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-31,0.65415,0.65422,0.64803,0.65415,0
2024-08-01,0.6547,0.65599,0.65147,0.6547,0
2024-08-02,0.64949,0.65476,0.64877,0.64949,0
2024-08-05,0.649722,0.651088,0.636549,0.649722,0
2024-08-06,0.65155,0.653971,0.647501,0.65155,0


In [6]:
# Drop only the 'volume' column
audusd_data.drop(columns=['volume'], inplace=True)
audusd_data.head()

Unnamed: 0_level_0,close,high,low,open
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-07-31,0.65415,0.65422,0.64803,0.65415
2024-08-01,0.6547,0.65599,0.65147,0.6547
2024-08-02,0.64949,0.65476,0.64877,0.64949
2024-08-05,0.649722,0.651088,0.636549,0.649722
2024-08-06,0.65155,0.653971,0.647501,0.65155


In [7]:
audusd_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 259 entries, 2024-07-31 to 2025-07-31
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   close   259 non-null    float64
 1   high    259 non-null    float64
 2   low     259 non-null    float64
 3   open    259 non-null    float64
dtypes: float64(4)
memory usage: 10.1 KB


In [8]:
# Check the statistics of the data
audusd_data.describe()

Unnamed: 0,close,high,low,open
count,259.0,259.0,259.0,259.0
mean,0.646969,0.649785,0.644186,0.646962
std,0.018929,0.018536,0.019069,0.01892
min,0.595529,0.60586,0.592308,0.5955
25%,0.631385,0.63395,0.628229,0.631329
50%,0.64753,0.65003,0.6444,0.647522
75%,0.6583,0.659981,0.65574,0.658298
max,0.691879,0.69411,0.69141,0.691879
