***
# Car sales
***

### Déterminer les ventes annuelles de 2017 à 2021

<u>Méthodologie</u> : <br>  
- ventes = stock_t - stock_t-1

Note : possiblité de sous-estimer les ventes puisque certain véhicules peuvent sortir ou entrer le parc (accident, remisage, véhicules saisoniers) 
***

In [1]:
# Create folders and getting path
import os
cwd = os.getcwd()

# Loop to create folders
folder_names = ['Dataframe', 'Output', 'Data']

folders = {}
for folder_name in folder_names:
    folders[folder_name] = os.path.join(cwd, folder_name)

    if not os.path.exists(os.path.join(cwd, folder_name)):
        os.makedirs(os.path.join(cwd, folder_name))
        print(f'Le dossier « {folder_name} » a été créé')

    else:
        print(f'Le dossier « {folder_name} » est existant')


# Création des variables de path

# Déterminer si os est win ou linux pour définir les path
if os.name == 'nt':
    slash = '\\'
elif os.name == 'posix':
    slash = '/'

path_prog =     cwd + slash
path_data =     folders['Data'] + slash
path_df =       folders['Dataframe'] + slash
path_output =   folders['Output'] + slash

# Mettre \\ pour éviter les erreurs
path_dict = [path_prog, path_data, path_df, path_output]
for path in path_dict:
    path = path.replace('\\','\\\\')


# Détermination de l'année
print()
print(path_prog)
print(path_data)
print(path_df)
print(path_output)

Le dossier « Dataframe » est existant
Le dossier « Output » est existant
Le dossier « Data » est existant

c:\Users\Charles_tour\Documents\GitHub\car_sales_forcast\
c:\Users\Charles_tour\Documents\GitHub\car_sales_forcast\Data\
c:\Users\Charles_tour\Documents\GitHub\car_sales_forcast\Dataframe\
c:\Users\Charles_tour\Documents\GitHub\car_sales_forcast\Output\


***

In [3]:
import pandas as pd
pd.set_option('display.max_columns', 1000)

saaq2016 = pd.read_csv(path_data + 'vehicules-circulation-2016.csv')
saaq2017 = pd.read_csv(path_data + 'vehicule-en-circulation-2017.csv')
saaq2018 = pd.read_csv(path_data + 'vehicule-en-circulation-2018.csv')

In [34]:
# Keep light duty vehicles
saaq_df = {
    # 2016 : saaq2016, 
    2017 : saaq2017,
    2018 : saaq2018
}


for year, df in saaq_df.items():
    print('-'*25)
    print(f'SAAQ_{year}')
    print('-'*25)
    print(f'Shape initial : {df.shape}')
    print(f'Nombre d\'observation total SAAQ_{year} : {len(df)}')
    saaq_df[year] = df.loc[(df['CLAS'] == 'PAU') | (df['CLAS'] == 'CAU') | (df['CLAS'] == 'TTA')]
    print(f'Nombre d\'observation light duty SAAQ_{year}: {len(saaq_df[year])}')
    print(df.columns.tolist())

-------------------------
SAAQ_2017
-------------------------
Shape initial : (6552488, 19)
Nombre d'observation total SAAQ_2017 : 6552488
Nombre d'observation light duty SAAQ_2017: 5231017
['AN', 'NOSEQ_VEH', 'CLAS', 'TYP_VEH_CATEG_USA', 'MARQ_VEH', 'MODEL_VEH', 'ANNEE_MOD', 'MASSE_NETTE', 'NB_CYL', 'CYL_VEH', 'NB_ESIEU_MAX', 'COUL_ORIG', 'TYP_CARBU', 'TYP_DOSS_PERS', 'PHYS_SEX', 'PHYS_AGE', 'REG_ADM', 'MRC', 'CG_FIXE']
-------------------------
SAAQ_2018
-------------------------
Shape initial : (6608276, 19)
Nombre d'observation total SAAQ_2018 : 6608276
Nombre d'observation light duty SAAQ_2018: 5259496
['AN', 'NOSEQ_VEH', 'CLAS', 'TYP_VEH_CATEG_USA', 'MARQ_VEH', 'MODEL_VEH', 'ANNEE_MOD', 'MASSE_NETTE', 'NB_CYL', 'CYL_VEH', 'NB_ESIEU_MAX', 'COUL_ORIG', 'TYP_CARBU', 'TYP_DOSS_PERS', 'PHYS_SEX', 'PHYS_AGE', 'REG_ADM', 'MRC', 'CG_FIXE']


In [35]:
saaq_df[2017].head()

Unnamed: 0,AN,NOSEQ_VEH,CLAS,TYP_VEH_CATEG_USA,MARQ_VEH,MODEL_VEH,ANNEE_MOD,MASSE_NETTE,NB_CYL,CYL_VEH,NB_ESIEU_MAX,COUL_ORIG,TYP_CARBU,TYP_DOSS_PERS,PHYS_SEX,PHYS_AGE,REG_ADM,MRC,CG_FIXE
0,2017,2017_0000000001,PAU,AU,TOYOT,COROL,2008.0,1165.0,4.0,1804.0,,GRI,E,P,M,64.0,Montérégie (16),Longueuil (58 ),58227.0
1,2017,2017_0000000002,PAU,AU,TOYOT,ECHO,2005.0,948.0,4.0,1492.0,,GRI,E,P,F,47.0,Montérégie (16),Longueuil (58 ),58227.0
3,2017,2017_0000000004,PAU,AU,INFIN,G35,2005.0,1664.0,6.0,3509.0,,BEI,E,P,M,61.0,Montérégie (16),Longueuil (58 ),58007.0
4,2017,2017_0000000005,CAU,AU,FORD,ECONO,2004.0,2080.0,8.0,5412.0,,BLA,E,M,,,Montérégie (16),Longueuil (58 ),58007.0
6,2017,2017_0000000007,CAU,AU,AUDI,Q5,2011.0,1803.0,4.0,1984.0,,BLA,E,M,,,Montréal (06),Montréal (66 ),66023.0


In [36]:
# Data type
saaq_df[2017].dtypes

AN                     int64
NOSEQ_VEH             object
CLAS                  object
TYP_VEH_CATEG_USA     object
MARQ_VEH              object
MODEL_VEH             object
ANNEE_MOD            float64
MASSE_NETTE          float64
NB_CYL               float64
CYL_VEH              float64
NB_ESIEU_MAX         float64
COUL_ORIG             object
TYP_CARBU             object
TYP_DOSS_PERS         object
PHYS_SEX              object
PHYS_AGE             float64
REG_ADM               object
MRC                   object
CG_FIXE              float64
dtype: object

In [37]:
# Missing values
saaq_df[2017].isna().sum()

AN                         0
NOSEQ_VEH                  0
CLAS                       0
TYP_VEH_CATEG_USA          0
MARQ_VEH                 432
MODEL_VEH               5530
ANNEE_MOD                  0
MASSE_NETTE             1039
NB_CYL                 15855
CYL_VEH                25350
NB_ESIEU_MAX         5231017
COUL_ORIG             434993
TYP_CARBU                  2
TYP_DOSS_PERS              0
PHYS_SEX              530324
PHYS_AGE              530324
REG_ADM                21594
MRC                    21594
CG_FIXE                21594
dtype: int64

In [38]:
# Delete missing values
for year, df in saaq_df.items():
    saaq_df[year] = df.dropna(subset='MODEL_VEH')
# Pour l'instant on delete les modèle, mais éventuellement, il pourrait être intéressant de déterminer quelle marque le véhicule en utilisant ses caractéristiques

In [40]:
saaq_df[2017].isna().sum()

AN                         0
NOSEQ_VEH                  0
CLAS                       0
TYP_VEH_CATEG_USA          0
MARQ_VEH                   0
MODEL_VEH                  0
ANNEE_MOD                  0
MASSE_NETTE             1016
NB_CYL                 15757
CYL_VEH                20123
NB_ESIEU_MAX         5225487
COUL_ORIG             430415
TYP_CARBU                  2
TYP_DOSS_PERS              0
PHYS_SEX              529834
PHYS_AGE              529834
REG_ADM                21577
MRC                    21577
CG_FIXE                21577
dtype: int64

*** 
# Estimation des ventes

### <u>À titre de comparatif </u>

<u>Les ventes en 2017</u> <br>
selon : https://www.automedia.ca/ventes-au-quebec-annee-2017/ <br>
Voiture = 187 529 <br>
Camion léger = 274 558 <br>
Total = 462 087 <br>
<br>

<u>Plus récemment</u> <br>
selon : https://www.automedia.ca/stats-ventes-2022-fev-mars-2023/
GRAND TOTAL DES VENTES DE VÉHICULES NEUFS AU QUÉBEC : 2022 = 369982, 2023 = 406980

selon : https://www.protegez-vous.ca/nouvelles/automobile/palmares-des-ventes-de-vehicules-neufs-en-2022#:~:text=Ainsi%2C%20contrairement%20%C3%A0%202021%2C%20les,recul%20de%2010%2C9%20%25. <br>
« Ainsi, contrairement à 2021, les ventes totales de véhicules neufs ont baissé en 2022. Elles sont passées de 400 844 à 357 030 unités »
***

#### Temporairement, prendre les modèle de l'année comme vente pour permettre de continuer le projet

In [63]:
# Garder les modèles les plus récents comme vente pour chaque année
vente_year = [2017, 2018]
vente_data = {}

for year in vente_year:
    vente_data[year] = saaq_df[year].loc[saaq_df[year]['ANNEE_MOD']>=year].reset_index(drop=True)
    print(f'Nombre de vente en {year} : {len(vente_data[year])}')
    vente_data[year].to_hdf(f'{path_df}vente_data_{year}.h5', key='s')

Nombre de vente en 2017 : 496045


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['NOSEQ_VEH', 'CLAS', 'TYP_VEH_CATEG_USA', 'MARQ_VEH', 'MODEL_VEH',
       'COUL_ORIG', 'TYP_CARBU', 'TYP_DOSS_PERS', 'PHYS_SEX', 'REG_ADM',
       'MRC'],
      dtype='object')]

  vente_data[year].to_hdf(f'{path_df}vente_data_{year}.h5', key='s')


Nombre de vente en 2018 : 477643


***

In [10]:
# vente =  vé_t - vé_t-1
# Puique qu'on a pas d'info sur le vente usagés, on va se concentrer sur les ventes neufs
# On estime que les ventes neuves sont tous les véhicules de l'année des données
saaq_2017 = saaq_df[2017].loc[saaq_df[2017]['ANNEE_MOD'] >= 2017]
saaq_2018 = saaq_df[2018].loc[saaq_df[2018]['ANNEE_MOD'] >= 2017]
# Note : si on prend juste les années modèle >= 2017, on surestime les ventes

In [26]:
dfs = [saaq_df[2017], saaq_df[2018]]
for df in dfs:
    print(len(df))

5253968
5253968


In [24]:
print(f'Nombre d\'observation df_2017 : {len(saaq_2017)}')
print(f'Nombre d\'observation df_2018 : {len(saaq_2018)}')
print('Différence : ', len(saaq2018) - len(saaq2017))

Nombre d'observation df_2017 : 914952
Nombre d'observation df_2018 : 914952
Différence :  55788


In [13]:
saaq_2017.head(2)

Unnamed: 0,AN,NOSEQ_VEH,CLAS,TYP_VEH_CATEG_USA,MARQ_VEH,MODEL_VEH,ANNEE_MOD,MASSE_NETTE,NB_CYL,CYL_VEH,NB_ESIEU_MAX,COUL_ORIG,TYP_CARBU,TYP_DOSS_PERS,PHYS_SEX,PHYS_AGE,REG_ADM,MRC,CG_FIXE
10,2018,2018_0000000011,PAU,AU,BMW,X3,2018.0,1885.0,4.0,2001.0,,BLA,E,P,F,50.0,Capitale-Nationale (03),La Jacques-Cartier (22 ),22015.0
19,2018,2018_0000000020,PAU,AU,FORD,F150,2018.0,2150.0,6.0,2706.0,,NOI,E,P,M,34.0,Lanaudière (14),Joliette (61 ),61050.0


In [14]:
saaq_2017_key = saaq_2017
saaq_2018_key = saaq_2018

# Add column key for merging
# columns_to_join = ['CLAS', 'MARQ_VEH', 'MODEL_VEH', 'ANNEE_MOD', 'MASSE_NETTE', 'NB_CYL', 'CYL_VEH', 'COUL_ORIG', 'TYP_CARBU', 'TYP_DOSS_PERS', 'PHYS_SEX', 'PHYS_AGE', 'REG_ADM', 'MRC', 'CG_FIXE']
columns_to_join = ['CLAS', 'MARQ_VEH', 'MODEL_VEH', 'ANNEE_MOD', 'MASSE_NETTE', 'NB_CYL', 'CYL_VEH', 'COUL_ORIG', 'TYP_CARBU', 'CG_FIXE']


def join_columns(row):
    return '_'.join(str(row[col]) for col in columns_to_join)

saaq_2017_key['merge'] = saaq_2017_key.apply(join_columns, axis=1)
saaq_2018_key['merge'] = saaq_2018_key.apply(join_columns, axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  saaq_2017_key['merge'] = saaq_2017_key.apply(join_columns, axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  saaq_2018_key['merge'] = saaq_2018_key.apply(join_columns, axis=1)


In [20]:
saaq_2017_key_merge = saaq_2017_key.filter(['merge'])
saaq_2018_key_merge = saaq_2018_key.filter(['merge'])

In [22]:
print(len(saaq_2017_key_merge))
print(len(saaq_2018_key_merge))

496045
914952

914952
914952


914952

In [17]:
group_2017 = saaq_2017_key_merge
group_2017['count_column'] = group_2017.groupby('merge')['merge'].transform('count')
group_2017 = group_2017.drop_duplicates(subset='merge')
print(group_2017['count_column'].sum())
group_2017

914952


Unnamed: 0,merge,count_column
10,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,1
19,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,1
22,PAU_NISSA_KICKS_2019.0_1430.0_4.0_1607.0_ROU_E...,1
29,PAU_ACURA_RDX_2018.0_1703.0_6.0_3509.0_NOI_E_2...,7
32,PAU_HYUND_SANTA_2017.0_1681.0_4.0_2361.0_GRI_E...,1
...,...,...
6608227,PAU_NISSA_FRONT_2017.0_1884.0_6.0_4001.0_BLA_E...,1
6608230,PAU_HONDA_CIVIC_2018.0_1224.0_4.0_2001.0_ARG_E...,1
6608251,CAU_CHEVR_SILVE_2018.0_2473.0_8.0_5297.0_BLA_E...,1
6608262,PAU_DODGE_GRAND_2018.0_1971.0_6.0_3608.0_BLA_E...,1


In [18]:
group_2018 = saaq_2018_key_merge
group_2018['count_column'] = group_2018.groupby('merge')['merge'].transform('count')
group_2018 = group_2018.drop_duplicates(subset='merge')
print(group_2018['count_column'].sum())
group_2017

914952


Unnamed: 0,merge,count_column
10,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,1
19,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,1
22,PAU_NISSA_KICKS_2019.0_1430.0_4.0_1607.0_ROU_E...,1
29,PAU_ACURA_RDX_2018.0_1703.0_6.0_3509.0_NOI_E_2...,7
32,PAU_HYUND_SANTA_2017.0_1681.0_4.0_2361.0_GRI_E...,1
...,...,...
6608227,PAU_NISSA_FRONT_2017.0_1884.0_6.0_4001.0_BLA_E...,1
6608230,PAU_HONDA_CIVIC_2018.0_1224.0_4.0_2001.0_ARG_E...,1
6608251,CAU_CHEVR_SILVE_2018.0_2473.0_8.0_5297.0_BLA_E...,1
6608262,PAU_DODGE_GRAND_2018.0_1971.0_6.0_3608.0_BLA_E...,1


In [19]:
# df_merge = group_2018.merge(group_2017, on='merge',indicator=True, suffixes=('_2018', '_2017'))
df_merge = pd.merge(group_2017, group_2018, how='outer', left_on='merge', right_on='merge', indicator=True)

df_merge

Unnamed: 0,merge,count_column_x,count_column_y,_merge
0,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,1,1,both
1,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,1,1,both
2,PAU_NISSA_KICKS_2019.0_1430.0_4.0_1607.0_ROU_E...,1,1,both
3,PAU_ACURA_RDX_2018.0_1703.0_6.0_3509.0_NOI_E_2...,7,7,both
4,PAU_HYUND_SANTA_2017.0_1681.0_4.0_2361.0_GRI_E...,1,1,both
...,...,...,...,...
505330,PAU_NISSA_FRONT_2017.0_1884.0_6.0_4001.0_BLA_E...,1,1,both
505331,PAU_HONDA_CIVIC_2018.0_1224.0_4.0_2001.0_ARG_E...,1,1,both
505332,CAU_CHEVR_SILVE_2018.0_2473.0_8.0_5297.0_BLA_E...,1,1,both
505333,PAU_DODGE_GRAND_2018.0_1971.0_6.0_3608.0_BLA_E...,1,1,both


In [106]:
df_merge.to_excel(path_output + 'df_merge.xlsx')

In [105]:
print(df_merge.loc[df_merge['_merge']=='both'].count_column_y.sum())

537331.0


In [107]:
print(len(df_merge.loc[df_merge['_merge']=='both']))
print(len(df_merge.loc[df_merge['_merge']!='both']))

241793
298258


In [108]:
print(df_merge['count_column_x'].sum())
print(df_merge['count_column_y'].sum())

496045.0
914952.0


In [112]:
# Ce qui est dans 2018 qui ne se retrouve pas dans 2017 serait les ventes
vente_2018 = df_merge.loc[df_merge['_merge']=='right_only']
print(vente_2018['count_column_y'].sum())
# vente_2018.to_excel(path_output + 'vente_2018.xlsx')
vente_2018

377621.0


Unnamed: 0,merge,count_column_x,count_column_y,_merge
276509,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,,1.0,right_only
276510,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,,1.0,right_only
276511,PAU_NISSA_KICKS_2019.0_1430.0_4.0_1607.0_ROU_E...,,1.0,right_only
276512,CAU_CHEVR_MALIB_2018.0_1401.0_4.0_1509.0_BLA_E...,,58.0,right_only
276513,PAU_FORD_EDGE_2018.0_1844.0_6.0_3509.0_NOI_E_8...,,6.0,right_only
...,...,...,...,...
540046,PAU_CHEVR_TAHOE_2017.0_2514.0_8.0_5297.0_NOI_E...,,1.0,right_only
540047,PAU_HONDA_CIVIC_2018.0_1224.0_4.0_2001.0_ARG_E...,,1.0,right_only
540048,CAU_CHEVR_SILVE_2018.0_2473.0_8.0_5297.0_BLA_E...,,1.0,right_only
540049,PAU_DODGE_GRAND_2018.0_1971.0_6.0_3608.0_BLA_E...,,1.0,right_only


In [115]:
saaq_2018_key.head(2)

Unnamed: 0,AN,NOSEQ_VEH,CLAS,TYP_VEH_CATEG_USA,MARQ_VEH,MODEL_VEH,ANNEE_MOD,MASSE_NETTE,NB_CYL,CYL_VEH,NB_ESIEU_MAX,COUL_ORIG,TYP_CARBU,TYP_DOSS_PERS,PHYS_SEX,PHYS_AGE,REG_ADM,MRC,CG_FIXE,merge
10,2018,2018_0000000011,PAU,AU,BMW,X3,2018.0,1885.0,4.0,2001.0,,BLA,E,P,F,50.0,Capitale-Nationale (03),La Jacques-Cartier (22 ),22015.0,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0
19,2018,2018_0000000020,PAU,AU,FORD,F150,2018.0,2150.0,6.0,2706.0,,NOI,E,P,M,34.0,Lanaudière (14),Joliette (61 ),61050.0,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...


In [123]:
vente_2018.drop(columns='_merge').head(2)

Unnamed: 0,merge,count_column_x,count_column_y
276509,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,,1.0
276510,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,,1.0


In [126]:
# vente_unitaire_2018 = pd.merge(saaq_2018_key, vente_2018, how='outer', left_on='merge', right_on='merge', indicator=True)
vente_unitaire_2018 = pd.merge(saaq_2018_key, vente_2018)
vente_unitaire_2018.head(2)

Unnamed: 0,AN,NOSEQ_VEH,CLAS,TYP_VEH_CATEG_USA,MARQ_VEH,MODEL_VEH,ANNEE_MOD,MASSE_NETTE,NB_CYL,CYL_VEH,NB_ESIEU_MAX,COUL_ORIG,TYP_CARBU,TYP_DOSS_PERS,PHYS_SEX,PHYS_AGE,REG_ADM,MRC,CG_FIXE,merge,count_column_x,count_column_y,_merge
0,2018,2018_0000000011,PAU,AU,BMW,X3,2018.0,1885.0,4.0,2001.0,,BLA,E,P,F,50.0,Capitale-Nationale (03),La Jacques-Cartier (22 ),22015.0,PAU_BMW_X3_2018.0_1885.0_4.0_2001.0_BLA_E_22015.0,,1.0,right_only
1,2018,2018_0000000020,PAU,AU,FORD,F150,2018.0,2150.0,6.0,2706.0,,NOI,E,P,M,34.0,Lanaudière (14),Joliette (61 ),61050.0,PAU_FORD_F150_2018.0_2150.0_6.0_2706.0_NOI_E_6...,,1.0,right_only


In [131]:
vente_unitaire_2018.to_excel(path_output + 'vente_unitaire_2018.xlsx')

In [130]:
len(vente_unitaire_2018.loc[vente_unitaire_2018['_merge'] == 'both'])

0

In [133]:
vente_unitaire_2018_cl = vente_unitaire_2018.drop(columns={'merge', 'count_column_x', 'count_column_y', '_merge'})
vente_unitaire_2018_cl

Unnamed: 0,AN,NOSEQ_VEH,CLAS,TYP_VEH_CATEG_USA,MARQ_VEH,MODEL_VEH,ANNEE_MOD,MASSE_NETTE,NB_CYL,CYL_VEH,NB_ESIEU_MAX,COUL_ORIG,TYP_CARBU,TYP_DOSS_PERS,PHYS_SEX,PHYS_AGE,REG_ADM,MRC,CG_FIXE
0,2018,2018_0000000011,PAU,AU,BMW,X3,2018.0,1885.0,4.0,2001.0,,BLA,E,P,F,50.0,Capitale-Nationale (03),La Jacques-Cartier (22 ),22015.0
1,2018,2018_0000000020,PAU,AU,FORD,F150,2018.0,2150.0,6.0,2706.0,,NOI,E,P,M,34.0,Lanaudière (14),Joliette (61 ),61050.0
2,2018,2018_0000000023,PAU,AU,NISSA,KICKS,2019.0,1430.0,4.0,1607.0,,ROU,E,P,M,48.0,Estrie (05),Sherbrooke (43 ),43027.0
3,2018,2018_0000000045,CAU,AU,CHEVR,MALIB,2018.0,1401.0,4.0,1509.0,,BLA,E,M,,,Montréal (06),Montréal (66 ),66072.0
4,2018,2018_0000341895,CAU,AU,CHEVR,MALIB,2018.0,1401.0,4.0,1509.0,,BLA,E,M,,,Montréal (06),Montréal (66 ),66072.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377616,2018,2018_0006608202,PAU,AU,CHEVR,TAHOE,2017.0,2514.0,8.0,5297.0,,NOI,E,P,M,44.0,Montérégie (16),Vaudreuil-Soulanges (71 ),71133.0
377617,2018,2018_0006608231,PAU,AU,HONDA,CIVIC,2018.0,1224.0,4.0,2001.0,,ARG,E,P,F,46.0,Chaudière-Appalaches (12),Robert-Cliche (27 ),27008.0
377618,2018,2018_0006608252,CAU,AU,CHEVR,SILVE,2018.0,2473.0,8.0,5297.0,,BLA,E,M,,,Montréal (06),Montréal (66 ),66023.0
377619,2018,2018_0006608263,PAU,AU,DODGE,GRAND,2018.0,1971.0,6.0,3608.0,,BLA,E,P,M,33.0,Côte-Nord (09),La Haute-Côte-Nord (95 ),95025.0


In [134]:
vente_unitaire_2018_cl.ANNEE_MOD.unique()

array([2018., 2019., 2017.])

In [None]:
# Vente par région
# age des acheteurs
# top 20 des véhicules les plus achetés
# type de carburant (nb VEB et VHB)