# 1. Samsung

In [15]:
import glob
import pandas as pd
from datetime import datetime
import os
from dateutil import parser
import numpy as np
import matplotlib.pyplot as plt

In [16]:
def excel_to_dataframe(excel_path):
    df1 = pd.read_excel(excel_path, sheet_name = 'Monthly', usecols = 'A', names=['device'],header=None)
    df1['support_type'] = 'Monthly'
    df1['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    df2 = pd.read_excel(excel_path, sheet_name = 'Quarterly', usecols = 'A', names=['device'],header=None)
    df2['support_type'] = 'Quarterly'
    df2['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    try:
        df3 = pd.read_excel(excel_path, sheet_name = 'Other', usecols = 'A', names=['device'],header=None)
        df3['support_type'] = 'Biannually'
        df3['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
    except ValueError:
        try: 
            df3 = pd.read_excel(excel_path, sheet_name = 'Biannual', usecols = 'A', names=['device'],header=None)
            df3['support_type'] = 'Biannually'
            df3['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
        except ValueError:
            df3 = pd.DataFrame([])
        
    try:
        df4 = pd.read_excel(excel_path, sheet_name = 'Wearable', usecols = 'A', names=['device'],header=None)
        df4['support_type'] = 'Wearable'
        df4['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
    except ValueError:
        df4 = pd.DataFrame([])

    df = pd.concat([df1,df2,df3,df4])
    return df


def get_support_timeline(device,dataset):

    df = dataset
    
    #print('Getting support timeline for {}!'.format(device))
    
    df_device = df[df.device == device].sort_values(by='date')
    
    try:
        monthly_start = df_device[df_device.support_type == 'Monthly'].iloc[0]['date']
        monthly_end = df_device[df_device.support_type == 'Monthly'].iloc[-1]['date']
    except IndexError:
        monthly_start = None
        monthly_end = None
        
    try: 
        quarterly_start = df_device[df_device.support_type == 'Quarterly'].iloc[0]['date']
        quarterly_end = df_device[df_device.support_type == 'Quarterly'].iloc[-1]['date']
    except IndexError:
        quarterly_start = None
        quarterly_end = None
        
    try:
        biannually_start = df_device[df_device.support_type == 'Biannually'].iloc[0]['date']
        biannually_end = df_device[df_device.support_type == 'Biannually'].iloc[-1]['date']
    except IndexError:
        biannually_start = None
        biannually_end = None
    
    try:
        wearable_start = df_device[df_device.support_type == 'Wearable'].iloc[0]['date']
        wearable_end = df_device[df_device.support_type == 'Wearable'].iloc[-1]['date']
    except IndexError:
        wearable_start = None
        wearable_end = None
    result = [monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end]
    
    return result

In [17]:
dir_path = '../Data/Samsung/samsung-support-lists/'

print('Getting lists from {}'.format(dir_path))

excel_files = glob.glob(dir_path + '*.xlsx')
df_final = pd.DataFrame([])
for file in excel_files[:]:
    df_new = excel_to_dataframe(file)
    df_final = pd.concat([df_final,df_new],ignore_index=True)

df_final.to_csv('../Data/Samsung/support_lists.csv',index=False)

print('Converting excel files to DataFrame is done!')

df_timelines = pd.DataFrame()
for device in df_final['device'].unique():
    
    support_timeline = get_support_timeline(device,df_final)
    
    df_new = pd.DataFrame({"device": device, 
                           "monthly_start": support_timeline[0],
                           "monthly_end": support_timeline[1],
                           "quarterly_start": support_timeline[2],
                           "quarterly_end": support_timeline[3],
                           "biannually_start": support_timeline[4],
                           "biannually_end": support_timeline[5],
                           "wearable_start": support_timeline[6],
                           "wearable_end": support_timeline[7]},
                          index=[0])
 
    
    df_timelines = pd.concat([df_timelines,df_new],ignore_index=True)

df_timelines.to_csv('../Data/Samsung/timelines_samsung_devices_v1.csv',index=False)

df_timelines.head()

Getting lists from ../Data/Samsung/samsung-support-lists/
Converting excel files to DataFrame is done!


Unnamed: 0,device,monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end
0,Galaxy Fold,2019-10-08 22:18:41,2022-11-04 17:25:38,2022-11-08 08:46:24,2023-04-04 00:30:35,NaT,NaT,NaT,NaT
1,Galaxy Fold 5G,2020-12-10 23:59:35,2022-11-04 17:25:38,2022-11-08 08:46:24,2023-04-04 00:30:35,NaT,NaT,NaT,NaT
2,Galaxy Z Fold2,2020-09-14 17:19:27,2023-04-04 00:30:35,,,NaT,NaT,NaT,NaT
3,Galaxy Z Fold2 5G,2020-09-14 17:19:27,2023-04-04 00:30:35,,,NaT,NaT,NaT,NaT
4,Galaxy Z Flip,2020-03-03 11:31:08,2023-04-04 00:30:35,,,NaT,NaT,NaT,NaT


In [18]:
print('First snapshot: {}'.format(df_final['date'].min()))
print('Last snapshot: {}'.format(df_final['date'].max()))
print('All snapshots: {}'.format(len(df_final['date'].unique())))
print('Last snapshot: {}'.format(df_final['date'].max()))
print('Avg snapshot: {}'.format((df_final['date'].max()-df_final['date'].min()).days/len(df_final['date'].unique())))
print('Unique Devices: {}'.format(len(df_timelines.index)))

First snapshot: 2017-10-03 06:11:44
Last snapshot: 2023-04-04 00:30:35
All snapshots: 507
Last snapshot: 2023-04-04 00:30:35
Avg snapshot: 3.9605522682445757
Unique Devices: 258


In [19]:
len(df_final['device'].unique())

258

In [20]:
df = pd.read_csv('../Data/Samsung/timelines_samsung_devices_v1.csv')

date_first_list = '2017-10-03 06:11:44' # remove the first because maybe the device was already receving before
date_last_list = '2023-04-04 00:30:35' # remove the last because maybe the device will continue to receive 
date_nat = pd.Timestamp('NaT').to_pydatetime()

for column in df.columns:
    df.at[(df[column] == date_first_list) | (df[column] == date_last_list), column] = date_nat
    
df.to_csv('../Data/Samsung/timelines_samsung_devices_v2.csv',index=False)

df.head()


Unnamed: 0,device,monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end
0,Galaxy Fold,2019-10-08 22:18:41,2022-11-04 17:25:38,2022-11-08 08:46:24,NaT,,,,
1,Galaxy Fold 5G,2020-12-10 23:59:35,2022-11-04 17:25:38,2022-11-08 08:46:24,NaT,,,,
2,Galaxy Z Fold2,2020-09-14 17:19:27,NaT,,,,,,
3,Galaxy Z Fold2 5G,2020-09-14 17:19:27,NaT,,,,,,
4,Galaxy Z Flip,2020-03-03 11:31:08,NaT,,,,,,


In [21]:
df_timelines = pd.read_csv('../Data/Samsung/timelines_samsung_devices_v2.csv')
    
df_timelines[['monthly_start','monthly_end','quarterly_start','quarterly_end','biannually_start','biannually_end']] = df_timelines[['monthly_start','monthly_end','quarterly_start','quarterly_end','biannually_start','biannually_end']].apply(pd.to_datetime)
df_timelines['monthly_duration'] = df_timelines['monthly_end']-df_timelines['monthly_start']
df_timelines['quarterly_duration'] = df_timelines['quarterly_end']-df_timelines['quarterly_start']
df_timelines['biannually_duration'] = df_timelines['biannually_end']-df_timelines['biannually_start']

df_timelines.to_csv('../Data/Samsung/timelines_samsung_devices_v3.csv',index=False)

cond1 = ~(df_timelines['monthly_duration'].dt.days==0) #filter out if only one time appeared in the list
cond2 = ~df_timelines['monthly_duration'].isnull() #filter out as no monthly duration can be calculated

df_timelines_monthly = df_timelines[cond1&cond2].reset_index(drop=True)

cond3 = ~(df_timelines['quarterly_duration'].dt.days==0) #filter out if only one time appeared in the list
cond4 = ~df_timelines['quarterly_duration'].isnull() #filter out as no monthly duration can be calculated

df_timelines_quarterly = df_timelines[cond3&cond4].reset_index(drop=True)


cond5 = ~(df_timelines['biannually_duration'].dt.days==0) #filter out if only one time appeared in the list
cond6 = ~df_timelines['biannually_duration'].isnull() #filter out as no monthly duration can be calculated

df_timelines_biannual = df_timelines[cond5&cond6].reset_index(drop=True)


print("Avg monthly support duration: {}".format(df_timelines_monthly['monthly_duration'].mean(skipna=True)))
print('Monthly completed periods for: {} devices'.format(len(df_timelines_monthly['device'].unique())))

print("Avg quarterly support duration: {}".format(df_timelines_quarterly['quarterly_duration'].mean(skipna=True)))
print('Quarterly completed periods for: {} devices'.format(len(df_timelines_quarterly['device'].unique())))

print("Avg binannually support duration: {}".format(df_timelines_biannual['biannually_duration'].mean(skipna=True)))
print('Biannually completed periods for: {} devices'.format(len(df_timelines_biannual['device'].unique())))



Avg monthly support duration: 1044 days 03:57:27.250000
Monthly completed periods for: 20 devices
Avg quarterly support duration: 573 days 07:37:50.900000008
Quarterly completed periods for: 110 devices
Avg binannually support duration: 580 days 05:38:34.420000
Biannually completed periods for: 50 devices


In [27]:
df_timelines_tablets = df_timelines[df_timelines['device'].str.contains('Tab')]

df_timelines_tablets

Unnamed: 0,device,monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end,monthly_duration,quarterly_duration,biannually_duration
87,Galaxy Tab A 8 (2019),NaT,NaT,2019-10-08 22:18:41,2021-08-02 17:23:45,2021-08-06 07:29:59,NaT,,,NaT,663 days 19:05:04,NaT
88,Galaxy Tab A 8.4 (2020),NaT,NaT,2020-04-08 23:58:41,2022-04-03 17:02:57,2022-04-05 01:36:26,NaT,,,NaT,724 days 17:04:16,NaT
89,Galaxy Tab A7,NaT,NaT,2020-09-14 17:19:27,2022-09-25 04:28:13,2022-10-15 22:30:50,NaT,,,NaT,740 days 11:08:46,NaT
90,Galaxy Tab A7 Lite,NaT,NaT,2021-06-08 17:09:04,NaT,NaT,NaT,,,NaT,NaT,NaT
91,Galaxy Tab Active2,NaT,NaT,2018-03-08 21:26:30,2021-11-27 20:58:55,NaT,NaT,,,NaT,1359 days 23:32:25,NaT
92,Galaxy Tab Active Pro,NaT,NaT,2019-10-08 22:18:41,NaT,NaT,NaT,,,NaT,NaT,NaT
93,Galaxy Tab Active3,NaT,NaT,2020-10-12 20:04:56,NaT,NaT,NaT,,,NaT,NaT,NaT
94,Galaxy Tab S6,NaT,NaT,2019-10-01 12:43:14,2021-10-04 23:40:50,2021-10-05 16:13:36,NaT,,,NaT,734 days 10:57:36,NaT
95,Galaxy Tab S6 5G,NaT,NaT,2020-03-03 11:31:08,2022-02-08 00:01:56,2022-02-08 15:41:26,NaT,,,NaT,706 days 12:30:48,NaT
96,Galaxy Tab S6 Lite,NaT,NaT,2020-05-25 22:54:03,NaT,NaT,NaT,,,NaT,NaT,NaT


# 2. Xiaomi

In [13]:
import glob
import pandas as pd
from datetime import datetime
import os

def excel_to_dataframe(excel_path):
    df1 = pd.read_excel(excel_path, sheet_name = 'Monthly', usecols = 'A', names=['phone_model'],header=None)
    df1['support'] = 'Monthly'
    df1['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    df2 = pd.read_excel(excel_path, sheet_name = 'Quarterly', usecols = 'A', names=['phone_model'],header=None)
    df2['support'] = 'Quarterly'
    df2['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    try:
        df3 = pd.read_excel(excel_path, sheet_name = 'Other', usecols = 'A', names=['phone_model'],header=None)
        df3['support'] = 'Biannually'
        df3['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
    except ValueError:
        try: 
            df3 = pd.read_excel(excel_path, sheet_name = 'Biannual', usecols = 'A', names=['phone_model'],header=None)
            df3['support'] = 'Biannually'
            df3['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
        except ValueError:
            df3 = pd.DataFrame([])
            
        
        
    try:
        df4 = pd.read_excel(excel_path, sheet_name = 'Wearable', usecols = 'A', names=['phone_model'],header=None)
        df4['support'] = 'Wearable'
        df4['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
    except ValueError:
        df4 = pd.DataFrame([])

    df = pd.concat([df1,df2,df3,df4])
    return df


def get_support_timeline(phone,dataset):

    df = dataset
    
    #print('Getting support timeline for {}!'.format(phone))
    
    df_phone = df[df.phone_model == phone].sort_values(by='date')
    
    try:
        monthly_start = df_phone[df_phone.support == 'Monthly'].iloc[0]['date']
        monthly_end = df_phone[df_phone.support == 'Monthly'].iloc[-1]['date']
    except IndexError:
        monthly_start = None
        monthly_end = None
        
    try: 
        quarterly_start = df_phone[df_phone.support == 'Quarterly'].iloc[0]['date']
        quarterly_end = df_phone[df_phone.support == 'Quarterly'].iloc[-1]['date']
    except IndexError:
        quarterly_start = None
        quarterly_end = None
        
    try:
        biannually_start = df_phone[df_phone.support == 'Biannually'].iloc[0]['date']
        biannually_end = df_phone[df_phone.support == 'Biannually'].iloc[-1]['date']
    except IndexError:
        biannually_start = None
        biannually_end = None
    
    try:
        wearable_start = df_phone[df_phone.support == 'Wearable'].iloc[0]['date']
        wearable_end = df_phone[df_phone.support == 'Wearable'].iloc[-1]['date']
    except IndexError:
        wearable_start = None
        wearable_end = None
    result = [monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end]
    
    return result

dir_path = '../Data/Xiaomi/Xiaomi-device-support-lists/'

print('Getting lists from {}'.format(dir_path))

excel_files = glob.glob(dir_path + '*.xlsx')
df_final_xiaomi = pd.DataFrame([])
for file in excel_files[:]:
    df_new = excel_to_dataframe(file)
    df_final_xiaomi = pd.concat([df_final_xiaomi,df_new],ignore_index=True)

print('Converting excel files to DataFrame is done!')

df_timelines_xiaomi = pd.DataFrame([])
for phone in df_final_xiaomi['phone_model'].unique():
    
    support_timeline = get_support_timeline(phone,df_final_xiaomi)
    
    df_new = pd.DataFrame({"phone": phone, 
                           "monthly_start": support_timeline[0],
                           "monthly_end": support_timeline[1],
                           "quarterly_start": support_timeline[2],
                           "quarterly_end": support_timeline[3],
                           "biannually_start": support_timeline[4],
                           "biannually_end": support_timeline[5],
                           "wearable_start": support_timeline[6],
                           "wearable_end": support_timeline[7]},
                          index=[0])
 
    
    df_timelines_xiaomi = pd.concat([df_timelines_xiaomi,df_new],ignore_index=True)
    
df_timelines_xiaomi.to_csv('../Data/Xiaomi/timelines_Xiaomi.csv',index=False)

df_timelines_xiaomi.head()


Getting lists from ../Data/Xiaomi/Xiaomi-device-support-lists/
Converting excel files to DataFrame is done!


Unnamed: 0,phone,monthly_start,monthly_end,quarterly_start,quarterly_end,biannually_start,biannually_end,wearable_start,wearable_end
0,Mi A3,2021-01-27 17:45:16,2022-10-31 17:52:22,2021-01-27 17:45:16,2022-10-31 17:52:22,,,,
1,Mi A2,2021-01-27 17:45:16,2022-10-31 17:52:22,,,,,,
2,Mi A2 Lite,2021-01-27 17:45:16,2022-10-31 17:52:22,,,,,,
3,Mi 10T Pro,,,2021-01-27 17:45:16,2022-10-31 17:52:22,,,,
4,Mi 10 T,,,2021-01-27 17:45:16,2022-10-31 17:52:22,,,,


In [14]:
import os
import pandas as pd
from datetime import datetime
from datetime import date

# Folder path
folder_path = '../Data/Xiaomi/xiaomi_archieve_data_eos/'

# Initialize empty DataFrame
df_xiaomi_eos = pd.DataFrame(columns=['Date', 'Brand', 'Source URL'])

# Go through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx'):
        # Get timestamp from the filename
        timestamp = filename.split('.')[0]
        date = datetime.strptime(timestamp, '%Y%m%d%H%M%S').date() 

        # Read excel file
        excel_data = pd.read_excel(os.path.join(folder_path, filename))

        # Check if dataframe is empty
        if not excel_data.empty:
            # Source URL is the first element of 'Source URL' column
            source_url = excel_data['Source URL'].iloc[0]
            
            my_list = list(set(excel_data['Mi'].to_list()+excel_data['Redmi'].to_list()+excel_data['POCO'].to_list()))

            cleanedList = [x for x in my_list if str(x) != 'nan']

            # Create a temporary DataFrame from cleanedList and append it to df
            temp_df = pd.DataFrame({
                'Date': [date]*len(cleanedList), 
                'Brand': cleanedList, 
                'Source URL': [source_url]*len(cleanedList)
            })
            df_xiaomi_eos = df_xiaomi_eos.append(temp_df, ignore_index=True)


df_xiaomi_eos = df_xiaomi_eos.sort_values('Date')
df_xiaomi_eos.head()

Unnamed: 0,Date,Brand,Source URL
2741,2021-10-12,Redmi Y2,https://web.archive.org/web/20211012105859/htt...
2779,2021-10-12,MI Note 2,https://web.archive.org/web/20211012105859/htt...
2780,2021-10-12,MI 6,https://web.archive.org/web/20211012105859/htt...
2781,2021-10-12,MI 5X,https://web.archive.org/web/20211012105859/htt...
2782,2021-10-12,Redmi 4X,https://web.archive.org/web/20211012105859/htt...


In [66]:
appearance_df = pd.DataFrame({
    'First Appearance': df_xiaomi_eos.groupby('Brand')['Date'].min(),
    'Last Appearance': df_xiaomi_eos.groupby('Brand')['Date'].max()
})

# Sort appearance_df by 'First Appearance'
appearance_df = appearance_df.sort_values('First Appearance')

appearance_df

Unnamed: 0_level_0,First Appearance,Last Appearance
Brand,Unnamed: 1_level_1,Unnamed: 2_level_1
MI 1,2021-10-12,2022-08-24
MIX 2,2021-10-12,2023-03-06
MIX 2S,2021-10-12,2023-03-06
MIX 3,2021-10-12,2023-03-06
Redmi 1,2021-10-12,2023-03-06
...,...,...
Redmi K30 5G Speed,2023-02-04,2023-03-06
Redmi 8A Dual,2023-02-04,2023-03-06
Redmi Note 5 Pro,2023-02-04,2023-03-06
Redmi Note 8 (IN),2023-02-04,2023-03-06


# 3. Oppo

In [4]:
import glob
import pandas as pd
from datetime import datetime
import os

def excel_to_dataframe(excel_path):
    df1 = pd.read_excel(excel_path, sheet_name = 'monthly', usecols = 'A', names=['phone_model'],header=None)
    df1['support'] = 'monthly'
    df1['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    df2 = pd.read_excel(excel_path, sheet_name = 'quarterly', usecols = 'A', names=['phone_model'],header=None)
    df2['support'] = 'quarterly'
    df2['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')
    
    df3 = pd.read_excel(excel_path, sheet_name = 'others', usecols = 'A', names=['phone_model'],header=None)
    df3['support'] = 'others'
    df3['date'] = datetime.strptime(os.path.splitext(os.path.basename(excel_path))[0],'%Y%m%d%H%M%S')

    df = pd.concat([df1,df2,df3])
    return df


def get_support_timeline(phone,dataset):

    df = dataset
    
    #print('Getting support timeline for {}!'.format(phone))
    
    df_phone = df[df.phone_model == phone].sort_values(by='date')
    
    try:
        monthly_start = df_phone[df_phone.support == 'monthly'].iloc[0]['date']
        monthly_end = df_phone[df_phone.support == 'monthly'].iloc[-1]['date']
    except IndexError:
        monthly_start = None
        monthly_end = None
        
    try: 
        quarterly_start = df_phone[df_phone.support == 'quarterly'].iloc[0]['date']
        quarterly_end = df_phone[df_phone.support == 'quarterly'].iloc[-1]['date']
    except IndexError:
        quarterly_start = None
        quarterly_end = None
        
    try: 
        others_start = df_phone[df_phone.support == 'others'].iloc[0]['date']
        others_end = df_phone[df_phone.support == 'others'].iloc[-1]['date']
    except IndexError:
        others_start = None
        others_end = None
        

    result = [monthly_start,monthly_end,quarterly_start,quarterly_end,others_start,others_end]
    
    return result

dir_path = '../Data/Oppo/oppo-support-lists/'

print('Getting lists from {}'.format(dir_path))

excel_files = glob.glob(dir_path + '*.xlsx')
df_final_oppo = pd.DataFrame([])
for file in excel_files[:]:
    df_new = excel_to_dataframe(file)
    df_final_oppo = pd.concat([df_final_oppo,df_new],ignore_index=True)

print('Converting excel files to DataFrame is done!')

df_timelines_oppo = pd.DataFrame([])
for phone in df_final_oppo['phone_model'].unique():
    
    support_timeline = get_support_timeline(phone,df_final_oppo)
    
    df_new = pd.DataFrame({"phone": phone, 
                           "monthly_start": support_timeline[0],
                           "monthly_end": support_timeline[1],
                           "quarterly_start": support_timeline[2],
                           "quarterly_end": support_timeline[3],
                           "others_start": support_timeline[4],
                           "others_end": support_timeline[5]},
                          index=[0])
 
    
    df_timelines_oppo = pd.concat([df_timelines_oppo,df_new],ignore_index=True)
    

# split the column values into two columns
df_timelines_oppo[['model_name', 'model_number']] = df_timelines_oppo['phone'].str.split('（', expand=True)
df_timelines_oppo['model_number'] = df_timelines_oppo['model_number'].str.replace('）', '')
df_timelines_oppo['model_name_v2'] = df_timelines_oppo['model_name'].str.split('(', expand=True)[0]

df_timelines_oppo['monthly_duration'] = (df_timelines_oppo['monthly_end']-df_timelines_oppo['monthly_start']).dt.days
df_timelines_oppo['quarterly_duration'] = (df_timelines_oppo['quarterly_end']-df_timelines_oppo['quarterly_start']).dt.days

df_timelines_oppo.to_csv('../Data/Oppo/timelines_Oppo.csv',index=False,encoding="utf-8")

df_timelines_oppo


Getting lists from ../Data/Oppo/oppo-support-lists/
Converting excel files to DataFrame is done!


Unnamed: 0,phone,monthly_start,monthly_end,quarterly_start,quarterly_end,others_start,others_end,model_name,model_number,model_name_v2,monthly_duration,quarterly_duration
0,Find X2（CPH2023、CPH1921）,2022-10-11 18:51:30,2023-03-07 19:24:15,,,NaT,NaT,Find X2,CPH2023、CPH1921,Find X2,147.0,
1,Find X2 Pro（CPH2025）,2022-10-11 18:51:30,2023-03-07 19:24:15,,,NaT,NaT,Find X2 Pro,CPH2025,Find X2 Pro,147.0,
2,Find X3（PEDM00）,2022-10-11 18:51:30,2023-03-07 19:24:15,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Find X3,PEDM00,Find X3,147.0,
3,Find X3 Neo（CPH2207）,2022-10-11 18:51:30,2023-03-07 19:24:15,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Find X3 Neo,CPH2207,Find X3 Neo,147.0,
4,Find X3 Pro（PEEM00、CPH2173、CPH2305）,2022-10-11 18:51:30,2023-03-07 19:24:15,,,,,Find X3 Pro,PEEM00、CPH2173、CPH2305,Find X3 Pro,147.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
323,Reno5 Pro 5G（PDSM00）,,,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Reno5 Pro 5G,PDSM00,Reno5 Pro 5G,,
324,Reno6 pro（CPH2247、CPH2249、PEPM00）,,,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Reno6 pro,CPH2247、CPH2249、PEPM00,Reno6 pro,,
325,Reno7（PFJM10）,,,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Reno7,PFJM10,Reno7,,
326,Reno7 pro（PFDM00）,,,,,2021-01-21 17:16:59,2022-06-09 07:23:31,Reno7 pro,PFDM00,Reno7 pro,,


In [12]:
df_timelines_oppo['phone']

Unnamed: 0,0,1
0,Find X2,CPH2023、CPH1921）
1,Find X2 Pro,CPH2025）
2,Find X3,PEDM00）
3,Find X3 Neo,CPH2207）
4,Find X3 Pro,PEEM00、CPH2173、CPH2305）
...,...,...
323,Reno5 Pro 5G,PDSM00）
324,Reno6 pro,CPH2247、CPH2249、PEPM00）
325,Reno7,PFJM10）
326,Reno7 pro,PFDM00）


In [11]:
df_timelines[df_timelines['phone'].str.contains('Reno4')]

Unnamed: 0,phone,monthly_start,monthly_end,quarterly_start,quarterly_end,others_start,others_end,model_name,model_number,model_name_v2
27,Reno4（CPH2113、CPH2109）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4,CPH2113、CPH2109,Reno4
28,Reno4 5G（PDPM00、PDPT00、CPH2091）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 5G,PDPM00、PDPT00、CPH2091,Reno4 5G
29,Reno4 F（CPH2209）,,,2021-01-21 17:16:59,2023-03-07 19:24:15,,,Reno4 F,CPH2209,Reno4 F
30,Reno4 Lite（CPH2125）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 Lite,CPH2125,Reno4 Lite
31,Reno4 Pro（PDNT00、PDNM00、CPH2109、CPH2113）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 Pro,PDNT00、PDNM00、CPH2109、CPH2113,Reno4 Pro
32,Reno4 Pro 5G（CPH2089）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 Pro 5G,CPH2089,Reno4 Pro 5G
33,Reno4 SE（PEAT00、PEAM00）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 SE,PEAT00、PEAM00,Reno4 SE
34,Reno4 Z 5G（CPH2065）,,,2022-10-11 18:51:30,2023-03-07 19:24:15,,,Reno4 Z 5G,CPH2065,Reno4 Z 5G
154,Reno4(CPH2113、CPH2109),,,NaT,NaT,2022-10-11 18:51:30,2023-03-07 19:24:15,Reno4(CPH2113、CPH2109),,Reno4
155,Reno4 5G(CPH2091),,,NaT,NaT,2022-10-11 18:51:30,2023-03-07 19:24:15,Reno4 5G(CPH2091),,Reno4 5G


# 4. Google

In [8]:
df_timeline_google = pd.read_excel('../Data/Google/Google-support-list.xlsx')

df_timeline_google.head()

Unnamed: 0,Phone,Guaranteed Android version updates until at least:,Guaranteed security updates until at least:,Unnamed: 3,Unnamed: 4
0,Pixel 7 & Pixel 7 Pro,2025-10-01,2027-10-01,,
1,Pixel 6 & Pixel 6 Pro,2024-10-01,2026-10-01,,*Nexus devices get security updates for at lea...
2,Pixel 6a,2025-07-01,2027-07-01,,
3,Pixel 5a with 5G,2024-08-01,2024-08-01,,
4,Pixel 5,2023-10-01,2023-10-01,,*Pixel phones get security updates for at leas...


In [40]:
import os
import pandas as pd
from datetime import datetime

# Folder path
folder_path = '../Data/Xiaomi/xiaomi_archieve_data_eos/'

# Initialize empty DataFrame
df_xiaomi_eos = pd.DataFrame(columns=['Date', 'Brand', 'Source URL'])

# Go through each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.xlsx'):
        # Get timestamp from the filename
        timestamp = filename.split('.')[0]
        date = datetime.strptime(timestamp, '%Y%m%d%H%M%S').date() 

        # Read excel file
        excel_data = pd.read_excel(os.path.join(folder_path, filename))

        # Check if dataframe is empty
        if not excel_data.empty:
            # Source URL is the first element of 'Source URL' column
            source_url = excel_data['Source URL'].iloc[0]
            
            my_list = list(set(excel_data['Mi'].to_list()+excel_data['Redmi'].to_list()+excel_data['POCO'].to_list()))

            cleanedList = [x for x in my_list if str(x) != 'nan']

            # Create a temporary DataFrame from cleanedList and append it to df
            temp_df = pd.DataFrame({
                'Date': [date]*len(cleanedList), 
                'Brand': cleanedList, 
                'Source URL': [source_url]*len(cleanedList)
            })
            df_xiaomi_eos = df_xiaomi_eos.append(temp_df, ignore_index=True)


df_xiaomi_eos = df_xiaomi_eos.sort_values('Date')
df_xiaomi_eos.head()

Unnamed: 0,Date,Brand,Source URL
2741,2021-10-12,MI Pad 4 Plus,https://web.archive.org/web/20211012105859/htt...
2779,2021-10-12,MI MAX 3,https://web.archive.org/web/20211012105859/htt...
2780,2021-10-12,MI 4c,https://web.archive.org/web/20211012105859/htt...
2781,2021-10-12,Redmi 1S,https://web.archive.org/web/20211012105859/htt...
2782,2021-10-12,Redmi 4X,https://web.archive.org/web/20211012105859/htt...
