In [1]:
import altair as alt
import pandas as pd

traits_raw_df = pd.read_csv("../data/breed_traits.csv")

traits_raw_df

Unnamed: 0,Breed,Affectionate With Family,Good With Young Children,Good With Other Dogs,Shedding Level,Coat Grooming Frequency,Drooling Level,Coat Type,Coat Length,Openness To Strangers,Playfulness Level,Watchdog/Protective Nature,Adaptability Level,Trainability Level,Energy Level,Barking Level,Mental Stimulation Needs
0,Retrievers (Labrador),5,5,5,4,2,2,Double,Short,5,5,3,5,5,5,3,4
1,French Bulldogs,5,5,4,3,1,3,Smooth,Short,5,5,3,5,4,3,1,3
2,German Shepherd Dogs,5,5,3,4,2,2,Double,Medium,3,4,5,5,5,5,3,5
3,Retrievers (Golden),5,5,5,4,2,2,Double,Medium,5,4,3,5,5,3,1,4
4,Bulldogs,4,3,3,3,3,3,Smooth,Short,4,4,3,3,4,3,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190,Cesky Terriers,4,5,3,2,2,1,Wavy,Medium,4,3,3,4,3,3,3,3
191,American Foxhounds,3,5,5,3,1,1,Smooth,Short,3,3,3,3,3,4,5,3
192,Azawakhs,3,3,3,2,2,1,Smooth,Short,1,3,3,3,2,3,1,3
193,English Foxhounds,5,5,5,3,1,2,Double,Short,4,4,3,4,4,4,5,4


In [2]:
breed_rank_raw_df = pd.read_csv("../data/breed_rank.csv")

breed_rank_raw_df

Unnamed: 0,Breed,2013 Rank,2014 Rank,2015 Rank,2016 Rank,2017 Rank,2018 Rank,2019 Rank,2020 Rank,links,Image
0,Retrievers (Labrador),1.0,1.0,1.0,1.0,1.0,1.0,1.0,1,https://www.akc.org/dog-breeds/labrador-retrie...,https://www.akc.org/wp-content/uploads/2017/11...
1,French Bulldogs,11.0,9.0,6.0,6.0,4.0,4.0,4.0,2,https://www.akc.org/dog-breeds/french-bulldog/,https://www.akc.org/wp-content/uploads/2017/11...
2,German Shepherd Dogs,2.0,2.0,2.0,2.0,2.0,2.0,2.0,3,https://www.akc.org/dog-breeds/german-shepherd...,https://www.akc.org/wp-content/uploads/2017/11...
3,Retrievers (Golden),3.0,3.0,3.0,3.0,3.0,3.0,3.0,4,https://www.akc.org/dog-breeds/golden-retriever/,https://www.akc.org/wp-content/uploads/2017/11...
4,Bulldogs,5.0,4.0,4.0,4.0,5.0,5.0,5.0,5,https://www.akc.org/dog-breeds/bulldog/,https://www.akc.org/wp-content/uploads/2017/11...
...,...,...,...,...,...,...,...,...,...,...,...
190,Cesky Terriers,174.0,182.0,179.0,182.0,185.0,185.0,190.0,191,https://www.akc.org/dog-breeds/cesky-terrier/,https://www.akc.org/wp-content/uploads/2017/11...
191,American Foxhounds,176.0,180.0,181.0,189.0,187.0,186.0,191.0,192,https://www.akc.org/dog-breeds/american-foxhound/,https://www.akc.org/wp-content/uploads/2017/11...
192,Azawakhs,,,,,,,,193,https://www.akc.org/dog-breeds/azawakh/,https://www.akc.org/wp-content/uploads/2017/11...
193,English Foxhounds,177.0,183.0,184.0,187.0,189.0,188.0,195.0,194,https://www.akc.org/dog-breeds/english-foxhound/,https://www.akc.org/wp-content/uploads/2017/11...


In [3]:
# The following static input should eventually come from the dashboard's checkboxes and slidebars.

traits_list = ['Affectionate With Family',
               'Good With Young Children',
               'Good With Other Dogs',
               'Shedding Level']

traits_weights = [5, 5, -1, -1]

In [4]:
def top_5_plot_func(traits_raw_df, traits_list, traits_weights, breed_rank_raw_df):
    """
    Returns an altair object of a bar chart of top 5 dog breeds.
    
    Parameters:
        traits_raw_df (pandas.DataFrame): a dataframe of breed traits
        traits_list (list): the list of traits that the users have specified
        traits_weights (list): the list of weights of the traits
        
    Returns:
        top_5_plot (altair): altair chart object of the top 5 bar charts
        top_5_df (pandas.DataFrame): the dataframe with the top 5 breeds, the traits, the total 
            scores and the links to the images.
    """
    traits_raw_df["BreedID"] = traits_raw_df.index
    breed_rank_raw_df["BreedID"] = breed_rank_raw_df.index

    print(f"traits_list={traits_list}") #for debug
    traits_df = traits_raw_df.set_index('BreedID')[traits_list]
    
    traits_df['score'] = 0

    for i in range(len(traits_list)):
        traits_df['score'] += traits_df[traits_list[i]] * traits_weights[i]
        
    top_5_df = traits_df.sort_values('score', ascending=False).head(5).merge(
        breed_rank_raw_df, how='left', 
        on='BreedID'
    )
        
    top_5_plot = alt.Chart(top_5_df).mark_bar().encode(
        x=alt.X('score:Q'),
        y=alt.Y('Breed:N', sort='-x')
    )

    return top_5_plot, top_5_df

In [5]:
top_5_plot, top_5_df = top_5_plot_func(traits_raw_df, traits_list, traits_weights, 
                                       breed_rank_raw_df)

top_5_plot

traits_list=['Affectionate With Family', 'Good With Young Children', 'Good With Other Dogs', 'Shedding Level']


In [6]:
top_5_df

Unnamed: 0,BreedID,Affectionate With Family,Good With Young Children,Good With Other Dogs,Shedding Level,score,Breed,2013 Rank,2014 Rank,2015 Rank,2016 Rank,2017 Rank,2018 Rank,2019 Rank,2020 Rank,links,Image
0,119,5,5,1,2,47,Irish Terriers,123.0,125.0,113.0,115.0,114.0,116.0,121.0,120,https://www.akc.org/dog-breeds/irish-terrier/,https://www.akc.org/wp-content/uploads/2017/11...
1,57,5,5,3,1,46,Soft Coated Wheaten Terriers,51.0,49.0,47.0,50.0,49.0,53.0,54.0,58,https://www.akc.org/dog-breeds/soft-coated-whe...,https://www.akc.org/wp-content/uploads/2017/11...
2,128,5,5,3,1,46,American Hairless Terriers,,,,,,,,129,https://www.akc.org/dog-breeds/american-hairle...,https://www.akc.org/wp-content/uploads/2017/11...
3,91,5,5,3,1,46,Standard Schnauzers,90.0,90.0,91.0,85.0,90.0,89.0,99.0,92,https://www.akc.org/dog-breeds/standard-schnau...,https://www.akc.org/wp-content/uploads/2017/11...
4,5,5,5,3,1,46,Poodles,8.0,7.0,8.0,7.0,7.0,7.0,6.0,6,https://www.akc.org/dog-breeds/poodle-standard/,https://www.akc.org/wp-content/uploads/2017/11...


In [7]:
col_list = list()

for year in range(2013, 2021):
    new_col_name = str(year)
    col_list.append(str(year))
    old_col_name = new_col_name + " " + "Rank"
    top_5_df.rename(columns={old_col_name:new_col_name}, inplace=True)

top_5_rank_df = top_5_df.melt(id_vars = ['Breed', 'BreedID', 'score'], value_vars=col_list, var_name='Rank year', value_name='Rank')    

# top_5_rank_df = top_5_rank_df.merge(top_5_df['BreedID', 'score'], 
#                                     on='BreedID')

#top_5_rank_df

base_plot = alt.Chart(top_5_rank_df).encode(
    y=alt.Y('Rank:Q', scale=alt.Scale(zero=False, reverse=True)),
    x=alt.X('Rank year:Q', axis=alt.Axis(format='.0f')),
    color='Breed',
    tooltip=['Breed', 'score', 'Rank year', 'Rank']
)

line_plot = base_plot.mark_line()

point_plot = base_plot.mark_point(filled=True)

top_5_rank_plot = line_plot + point_plot

top_5_rank_plot 
