In [2]:
import datetime
import numpy as np
import pandas as pd
import scipy.io

mat_path = "wiki.mat"
mat_data = scipy.io.loadmat(mat_path)
wiki_struct = mat_data['wiki'][0, 0]

dob = wiki_struct['dob'][0]
photo_taken = wiki_struct['photo_taken'][0]
full_path = wiki_struct['full_path'][0]
face_score = wiki_struct['face_score'][0]
second_score = wiki_struct['second_face_score'][0]

def matlab_datenum_to_date(matlab_dn):
    return datetime.date.fromordinal(int(matlab_dn) - 366)

def compute_wiki_age(photo_year, birth_datenum):
    photo_date = datetime.date(photo_year, 7, 1)
    birth_date = matlab_datenum_to_date(birth_datenum)
    delta = photo_date - birth_date
    return delta.days / 365.2425

records = []
for i in range(len(dob)):
    if np.isinf(face_score[i]) or not np.isnan(second_score[i]):
        continue
    rel_path = full_path[i][0]
    age_val = compute_wiki_age(photo_taken[i], dob[i])
    records.append({"image_name": rel_path, "approx_age": age_val})

df = pd.DataFrame(records, columns=["image_name", "approx_age"])
print(df.head(10))
print("Total valid faces:", len(df))


                        image_name  approx_age
0  17/10000217_1981-05-05_2009.jpg   28.156636
1    12/100012_1948-07-03_2008.jpg   59.995756
2  16/10002116_1971-05-31_2012.jpg   41.087770
3  02/10002702_1960-11-09_2012.jpg   51.642402
4  41/10003541_1937-09-27_1971.jpg   33.758393
5  99/10004299_1908-08-19_1950.jpg   41.865336
6   56/1000456_1933-06-12_1969.jpg   36.052759
7  82/10004882_1987-05-16_2010.jpg   23.127100
8  47/10005947_1981-04-07_2006.jpg   25.232551
9   84/1000684_1972-04-05_2004.jpg   32.238855
Total valid faces: 40216
