# Data ADS

In [4]:
import numpy as np
import pandas as pd
import os
from datetime import datetime

import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials

In [5]:
# 1. Đọc danh sách ASIN từ file txt
with open('ASIN.txt', 'r') as f:
    asin_list = [line.strip() for line in f.readlines()]

# 2. Tạo list chứa các file .xlsx trong thư mục Data
folder_path = 'Data_ADS'
files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# 3. Đọc từng file, xử lý rồi append vào list
all_data = []

for file in files:
    # Trích xuất ngày từ tên file
    date_str = file.split('_')[1].replace('.xlsx', '')  # "20250713"
    date_obj = datetime.strptime(date_str, "%Y%m%d")     # datetime object
    df = pd.read_excel(os.path.join(folder_path, file))
    
    # Thêm cột date định dạng chuẩn
    df['date'] = date_obj
    
    # Tạo cột ASIN từ 10 ký tự đầu tiên của cột Portfolio
    df['ASIN'] = df['Portfolio'].astype(str).str[:10]
    
    all_data.append(df)

# 4. Gộp tất cả thành một DataFrame
merged_df = pd.concat(all_data, ignore_index=True)

# 5. Lọc chỉ giữ lại các dòng có ASIN nằm trong danh sách
filtered_df = merged_df[merged_df['ASIN'].isin(asin_list)]

#6. Di chuuyển cột ASIN lên đầu, sau Campaign type	
filtered_df = filtered_df[['Campaign type', 'ASIN'] + [col for col in filtered_df.columns if col not in ['Campaign type', 'ASIN']]]

# ✅ Kết quả cuối cùng
filtered_df.head()

Unnamed: 0,Campaign type,ASIN,Campaign,Status,Country,Profile,Portfolio,Target type,Daily Budget,Current Budget,...,CPA,Sales Same SKU,Sales Other SKU,Orders Same SKU,Orders Other SKU,Units Same SKU,Units Other SKU,date,Top-of-search IS,Avg.time in Budget
10,sponsoredProducts,B092HNBJT6,SP_B092HNBJT6_TUMBLER 20_COWORKER BLACK_Manual...,Delivering,US,NewEleven,B092HNBJT6_TUMBLER 20_COWORKER BLACK_TRINH,manual,$15.00,$0.00,...,2.85,39.96,23.97,2,2,2,2,2025-08-22,,
39,sponsoredProducts,B08R8R2LQF,SP_B08R8R2LQF_Tumbler 20_May The Forties Be Wi...,Delivering,US,NewEleven,B08R8R2LQF_TUMBLER 20_MAY THE FORTIES BE WITH ...,manual,$20.00,$0.00,...,5.94,68.94,0.0,3,--,3,--,2025-08-22,,
46,sponsoredProducts,B08R8R2LQF,B08R8R2LQF_20oz_May the forties black_40th bir...,Delivering,US,NewEleven,B08R8R2LQF_TUMBLER 20_MAY THE FORTIES BE WITH ...,manual,$10.00,$0.00,...,0.0,0.0,0.0,0,--,0,--,2025-08-22,,
49,sponsoredProducts,B0DPM5JXP7,B0DPM5JXP7_Orna_New home 2025 glass_new home o...,Delivering,US,NewEleven,B0DPM5JXP7_ORNAMENT_NEW HOME NEW BEGINNINGS NE...,manual,$10.00,$0.00,...,6.75,9.99,0.0,1,--,1,--,2025-08-22,,
65,sponsoredProducts,B0DH87FYNN,SP_B0DH87FYNN_Tumbler 20_Vintage 1965 Weird Be...,Delivering,US,NewEleven,B0DH87FYNN_TUMBLER 20_VINTAGE 1965 WEIRD BEING...,manual,$14.00,$0.00,...,5.6,19.98,0.0,1,--,1,--,2025-08-22,,


In [6]:
scopes = ["https://www.googleapis.com/auth/spreadsheets", 
          "https://www.googleapis.com/auth/drive"]
creds = Credentials.from_service_account_file("/Users/thuytrinh/Downloads/new_credential.json", scopes=scopes)
client = gspread.authorize(creds)

# Mở Google Sheet
sheet_id = "1n2Ug3joJwsuuSsrx6T92Zi_dihht7KzIebRTGpKnehc"

# Mở file Google Sheet (Spreadsheet object)
spreadsheet = client.open_by_key(sheet_id)
sheet1 = client.open_by_key(sheet_id).worksheet("DATA_XN_ADS")

sheet1.clear()  # Xoá nội dung cũ trong sheet
set_with_dataframe(sheet1, filtered_df)

# Total GMV

In [7]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
import re

import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials


In [8]:
# 1. Đọc danh sách ASIN từ file txt
with open('ASIN.txt', 'r') as f:
    asin_list = [line.strip() for line in f.readlines()]

# 2. Tạo list chứa các file .csv trong thư mục Data
folder_path = 'Data_Total'
files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

# 3. Đọc từng file, xử lý rồi append vào list
all_data = []

for file in files:
    # ✅ Dùng regex để tìm ngày theo pattern dd_mm_yyyy
    match = re.search(r'(\d{2}_\d{2}_\d{4})', file)
    
    if match:
        date_str = match.group(1)  # ví dụ: "20_07_2025"
        date_obj = datetime.strptime(date_str, "%d_%m_%Y")  # chuyển thành datetime object

        df = pd.read_csv(os.path.join(folder_path, file), sep=";")
        
        # Thêm cột 'Date' chuẩn hóa theo yyyy-mm-dd
        df['Date'] = date_obj.strftime('%Y-%m-%d')
        
        all_data.append(df)
    else:
        print(f"⚠️ Không tìm thấy ngày trong tên file: {file}")

# 4. Gộp tất cả thành một DataFrame
merged_df = pd.concat(all_data, ignore_index=True)

# ✅ 5. Chỉ giữ lại các dòng có ASIN nằm trong danh sách
merged_df = merged_df[merged_df['ASIN'].isin(asin_list)]

# 6. Ở cột Ads, Sales và Net profit, thay thế dấu phẩy bằng dấu chấm
merged_df['Ads'] = merged_df['Ads'].str.replace(',', '.').astype(float)
merged_df['Sales'] = merged_df['Sales'].str.replace(',', '.').astype(float)
merged_df['Net profit'] = merged_df['Net profit'].str.replace(',', '.').astype(float)

In [9]:
merged_df.head(7)

Unnamed: 0,Product,ASIN,SKU,Units,Refunds,Sales,Promo,Ads,Sponsored products (PPC),Sponsored Display,...,Net profit,Estimated payout,Expenses,Margin,ROI,BSR,Real ACOS,Sessions,Unit Session Percentage,Date
13,NewEleven Coworker Leaving Gifts - Farewell Gi...,B092HNBJT6,W7-BHYY-1NZC,12.0,,239.76,,-56.03,-5603,,...,38.85,9441,,1620,6992,19 612,2337,,,2025-07-31
14,NewEleven Coworker Leaving Gifts - Gifts For C...,B0BRB6NJCT,MF-GYAU-GTQX,12.0,,179.76,,-26.45,-2645,,...,40.15,6919,,2234,13826,14 306,1471,,,2025-07-31
20,NewEleven Gifts For New Mom 2025 – Pregnancy G...,B0DCJM56W3,SG-SV6L-3ILW,10.0,,129.9,,-24.7,-247,,...,11.45,3375,,881,5135,9 363,1901,,,2025-07-31
28,NewEleven Retirement Gifts For Men Women 2025 ...,B09XXP5V2W,FL-GKXG-L8RH,7.0,,139.86,,-33.36,-3336,,...,24.02,544,,1717,7907,24 869,2385,,,2025-07-31
30,NewEleven Coworker Leaving Gifts - Farewell Gi...,B09ZYCD6GX,1Q-FSNP-VBED,6.0,,119.88,,-16.7,-167,,...,33.21,5853,,2770,13116,19 612,1393,,,2025-07-31
33,NewEleven 60th Birthday Gifts For Men Women - ...,B0DH87FYNN,UM-NFPU-HUFM,6.0,2.0,119.88,,-27.97,-2797,,...,-3.83,1603,,-319,-1928,2 235,2333,,,2025-07-31
35,NewEleven Engagement Gifts for Couples 2025 - ...,B0DRT3MK34,87-ARXM-YV1I,6.0,,59.94,,-22.39,-2239,,...,1.45,1225,,242,1343,95 575,3735,,,2025-07-31


In [10]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 585 entries, 13 to 6531
Data columns (total 31 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Product                   585 non-null    object 
 1   ASIN                      585 non-null    object 
 2   SKU                       578 non-null    object 
 3   Units                     493 non-null    float64
 4   Refunds                   61 non-null     float64
 5   Sales                     492 non-null    float64
 6   Promo                     45 non-null     object 
 7   Ads                       355 non-null    float64
 8   Sponsored products (PPC)  355 non-null    object 
 9   Sponsored Display         0 non-null      object 
 10  Sponsored brands (HSA)    0 non-null      object 
 11  Sponsored Brands Video    0 non-null      object 
 12  Google ads                0 non-null      float64
 13  Facebook ads              0 non-null      float64
 14  % Refunds    

In [11]:
scopes = ["https://www.googleapis.com/auth/spreadsheets", 
          "https://www.googleapis.com/auth/drive"]
creds = Credentials.from_service_account_file("/Users/thuytrinh/Downloads/new_credential.json", scopes=scopes)
client = gspread.authorize(creds)

# Mở Google Sheet
sheet_id = "1n2Ug3joJwsuuSsrx6T92Zi_dihht7KzIebRTGpKnehc"

# Mở file Google Sheet (Spreadsheet object)
spreadsheet = client.open_by_key(sheet_id)
sheet1 = client.open_by_key(sheet_id).worksheet("DATA_SB_TOTAL")

sheet1.clear()  # Xóa dữ liệu cũ trước khi ghi mới
set_with_dataframe(sheet1, merged_df)