In [1]:
# Pandas is used for data manipulation
import pandas as pd
# Read in data and display first 5 rows
dogs = pd.read_csv('D:/projects/20151001hundehalter.csv')
dogs.head(5)

Unnamed: 0,HALTER_ID,ALTER,GESCHLECHT,STADTKREIS,STADTQUARTIER,RASSE1,RASSE1_MISCHLING,RASSE2,RASSE2_MISCHLING,RASSENTYP,GEBURTSJAHR_HUND,GESCHLECHT_HUND,HUNDEFARBE
0,126,51-60,m,9.0,92.0,Welsh Terrier,,,,K,2011,w,schwarz/braun
1,574,61-70,w,2.0,23.0,Cairn Terrier,,,,K,2002,w,brindle
2,695,41-50,m,6.0,63.0,Labrador Retriever,,,,I,2012,w,braun
3,893,61-70,w,7.0,71.0,Mittelschnauzer,,,,I,2010,w,schwarz
4,1177,51-60,m,10.0,102.0,Shih Tzu,,,,K,2011,m,schwarz/weiss


In [2]:
print('The shape of our features is:', dogs.shape)

The shape of our features is: (6980, 13)


In [3]:
dogs = dogs[["HALTER_ID", 'ALTER', 'GESCHLECHT', 'STADTKREIS', 'STADTQUARTIER','RASSE1','GESCHLECHT_HUND','HUNDEFARBE']]

In [4]:
dogs = dogs.rename(columns={'HALTER_ID': 'HOLDER_ID',
                            'ALTER': 'AGE',
                            'GESCHLECHT': 'GENDER',
                            'STADTKREIS': 'CITY DISTRICT',
                            'STADTQUARTIER': 'CITY QUARTER',
                            'RASSE1':'BREED',
                            'GESCHLECHT_HUND': 'DOG GENDER',
                            'HUNDEFARBE': 'COLOR',})
dogs.head(5)

Unnamed: 0,HOLDER_ID,AGE,GENDER,CITY DISTRICT,CITY QUARTER,BREED,DOG GENDER,COLOR
0,126,51-60,m,9.0,92.0,Welsh Terrier,w,schwarz/braun
1,574,61-70,w,2.0,23.0,Cairn Terrier,w,brindle
2,695,41-50,m,6.0,63.0,Labrador Retriever,w,braun
3,893,61-70,w,7.0,71.0,Mittelschnauzer,w,schwarz
4,1177,51-60,m,10.0,102.0,Shih Tzu,m,schwarz/weiss


## Ranking colors by most popular in descending order

In [5]:
colors = dogs.groupby('COLOR').size().sort_values(ascending=False)
colors.head(10)

COLOR
schwarz          764
tricolor         669
weiss            544
braun            530
schwarz/weiss    443
schwarz/braun    363
beige            342
braun/weiss      215
weiss/braun      196
black/tan        165
dtype: int64

## Creating correlation table between color words to find closest matches

### First try using Levenshtein distance

In [6]:
from Levenshtein import distance as levenshtein_distance

lev = pd.DataFrame(index = colors.index, columns = colors.index) #creating 2d array of to show correlation of words

for x in colors.index:
    for y in colors.index:
        lev.loc[x,y] = levenshtein_distance(x,y)
        
lev.head(10)
                        

COLOR,schwarz,tricolor,weiss,braun,schwarz/weiss,schwarz/braun,beige,braun/weiss,weiss/braun,black/tan,...,grau/weiss/braun,braun/weiss/beige,grau/weiss/beige,grau/braun/weiss,rot/grau,gemischt,silber/weiss,gelb/braun,creme/schwarz,Harlekin
COLOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
schwarz,0,7,7,6,6,6,7,10,9,8,...,14,16,15,15,7,8,10,9,6,8
tricolor,7,0,7,7,12,12,7,10,9,8,...,13,15,14,15,8,8,11,10,10,8
weiss,7,7,0,5,8,12,3,6,6,9,...,11,12,11,11,8,5,7,9,11,7
braun,6,7,5,0,12,8,4,6,6,6,...,11,12,13,11,6,8,10,5,11,6
schwarz/weiss,6,12,8,12,0,5,11,6,12,11,...,12,12,12,9,12,13,5,12,12,10
schwarz/braun,6,12,12,8,5,0,12,11,7,9,...,9,14,13,12,9,13,10,7,12,10
beige,7,7,3,4,11,12,0,8,9,8,...,14,12,11,13,7,6,9,9,12,7
braun/weiss,10,10,6,6,6,11,8,0,10,8,...,8,6,8,5,9,11,6,10,11,9
weiss/braun,9,9,6,6,12,7,9,10,0,8,...,5,10,9,11,7,9,11,4,10,10
black/tan,8,8,9,6,11,9,8,8,8,0,...,12,14,14,13,8,9,10,8,11,7


#### Testing how well it matches a specific color, lower distance is better

In [7]:
levlist = lev["braun/schwarz/weiss"].sort_values()
levlist.head(20)

COLOR
braun/schwarz/weiss        0
braun/schwarz/grau         5
schwarz/weiss              6
braun/schwarz              6
braun/rot/weiss            7
braun/weiss                8
blau/grau/weiss            8
grau/braun/weiss           8
grau/schwarz               8
braun schwarz gestromt     9
schwarz/beige              9
rot/schwarz               10
schwarz/gelb              10
rot/braun/weiss           10
braun schimmel            10
creme/schwarz             10
beige/grau/weiss          10
grau/weiss                10
blau/weiss                10
weiss/schwarz/braun       10
Name: braun/schwarz/weiss, dtype: object

#### not good, weiss/schwarz/braun is considered farther away despite representing the same color

### trying SequenceMatcher to find word correlation, maybe it will do a better job

In [8]:
from difflib import SequenceMatcher

matcher = pd.DataFrame(index = colors.index, columns = colors.index)

for x in colors.index:
    for y in colors.index:
        matcher.loc[x,y] = SequenceMatcher(None, x, y).ratio()
                        

In [9]:
matchlist = matcher["braun/schwarz/weiss"].sort_values(ascending = False)
matchlist.head(20)

COLOR
braun/schwarz/weiss            1.0
schwarz/weiss               0.8125
braun/schwarz               0.8125
braun/rot/weiss           0.764706
braun/schwarz/grau        0.756757
braun/weiss               0.733333
grau/schwarz              0.709677
schwarz/grau/weiss        0.702703
grau/braun/schwarz        0.702703
schwarz/braun/weiss       0.684211
braun/weiss/schwarz       0.684211
schwarz/weiss/braun       0.684211
beige/braun/schwarz       0.684211
schwarz/weiss/beige       0.684211
weiss/braun/schwarz       0.684211
braun schwarz gestromt    0.682927
rotbraun/weiss            0.666667
hellbraun/weiss           0.647059
rot/braun/weiss           0.647059
grau/braun/weiss          0.628571
Name: braun/schwarz/weiss, dtype: object

#### Slightly better, the permuations of "braun/schwarz/weiss" are higher in correlation, but still outranked

## Creating my own matcher function that matches all permutations of colors 

#### first making a test function that just prints out correlation

In [10]:
#Function that compares every permuation of the words with slashes
import itertools

def test_permumatch(str1,str2):
    for x in itertools.permutations(str1.split('/')):
        for y in itertools.permutations(str2.split('/')):
            xjoin = '/'.join(x)
            yjoin = '/'.join(y)
            print(xjoin , yjoin , SequenceMatcher(None, xjoin, yjoin).ratio())
    

### testing it out using "red/blue/green"

In [11]:
test_permumatch("red/blue/green","blue/red/green") #showing how the permutation + sequence matching finds similarity

red/blue/green blue/red/green 0.7142857142857143
red/blue/green blue/green/red 0.7142857142857143
red/blue/green red/blue/green 1.0
red/blue/green red/green/blue 0.6428571428571429
red/blue/green green/blue/red 0.7142857142857143
red/blue/green green/red/blue 0.5714285714285714
red/green/blue blue/red/green 0.6428571428571429
red/green/blue blue/green/red 0.6428571428571429
red/green/blue red/blue/green 0.6428571428571429
red/green/blue red/green/blue 1.0
red/green/blue green/blue/red 0.7142857142857143
red/green/blue green/red/blue 0.7142857142857143
blue/red/green blue/red/green 1.0
blue/red/green blue/green/red 0.7142857142857143
blue/red/green red/blue/green 0.7142857142857143
blue/red/green red/green/blue 0.6428571428571429
blue/red/green green/blue/red 0.5714285714285714
blue/red/green green/red/blue 0.5
blue/green/red blue/red/green 0.7142857142857143
blue/green/red blue/green/red 1.0
blue/green/red red/blue/green 0.7142857142857143
blue/green/red red/green/blue 0.64285714285714

#### looks good

### function which returns the best correlation of all permutations of colors

In [12]:
#Function that compares every permuation of the words with slashes
import itertools

def permumatch(str1,str2):
    best_match = 0
    for x in itertools.permutations(str1.split('/')):
        for y in itertools.permutations(str2.split('/')):
            xjoin = '/'.join(x) #split creates a tuple, converting that back into a string
            yjoin = '/'.join(y)
            match = SequenceMatcher(None, xjoin, yjoin).ratio()
            if match > best_match:
                best_match = match
                
    return best_match

    

### Using my new permutation+matcher to create a correlation table of colors

In [13]:
from difflib import SequenceMatcher

supermatcher = pd.DataFrame(index = colors.index, columns = colors.index)

for x in colors.index:
    for y in colors.index:
        supermatcher.loc[x,y] = permumatch(x,y)
                        

In [14]:
supermatchlist = supermatcher["braun/schwarz/weiss"].sort_values(ascending = False)
supermatchlist.head(20)

COLOR
weiss/schwarz/braun         1.0
schwarz/braun/weiss         1.0
schwarz/weiss/braun         1.0
weiss/braun/schwarz         1.0
braun/schwarz/weiss         1.0
braun/weiss/schwarz         1.0
schwarz/grau/weiss     0.918919
grau/weiss/schwarz     0.918919
beige/braun/schwarz    0.842105
schwarz/braun/beige    0.842105
schwarz/weiss            0.8125
schwarz/braun            0.8125
braun/schwarz            0.8125
weiss/schwarz            0.8125
schwarz/weiss/beige    0.789474
schwarz/braun/rot      0.777778
rot/braun/weiss        0.764706
braun/rot/weiss        0.764706
grau/braun/schwarz     0.756757
schwarz/braun/grau     0.756757
Name: braun/schwarz/weiss, dtype: object

#### Looks good, every permutation of "braun/schwarz/weiss" shows up at 1.0 correlation.
##### Aparently it's a popular enough color combination every arrangement showed up in the dog data list

# Here's the final correlation table

In [15]:
supermatcher.head(10)

COLOR,schwarz,tricolor,weiss,braun,schwarz/weiss,schwarz/braun,beige,braun/weiss,weiss/braun,black/tan,...,grau/weiss/braun,braun/weiss/beige,grau/weiss/beige,grau/braun/weiss,rot/grau,gemischt,silber/weiss,gelb/braun,creme/schwarz,Harlekin
COLOR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
schwarz,1.0,0.266667,0.166667,0.166667,0.7,0.7,0.0,0.222222,0.222222,0.25,...,0.26087,0.166667,0.173913,0.26087,0.266667,0.4,0.210526,0.117647,0.7,0.266667
tricolor,0.133333,1.0,0.153846,0.153846,0.190476,0.285714,0.153846,0.210526,0.210526,0.235294,...,0.25,0.16,0.166667,0.25,0.25,0.125,0.2,0.222222,0.285714,0.25
weiss,0.166667,0.153846,1.0,0.0,0.555556,0.111111,0.4,0.625,0.625,0.0,...,0.47619,0.454545,0.47619,0.47619,0.0,0.461538,0.588235,0.133333,0.222222,0.307692
braun,0.166667,0.153846,0.0,1.0,0.111111,0.555556,0.2,0.625,0.625,0.428571,...,0.47619,0.454545,0.380952,0.47619,0.461538,0.0,0.235294,0.666667,0.222222,0.307692
schwarz/weiss,0.7,0.285714,0.555556,0.111111,1.0,0.615385,0.222222,0.583333,0.583333,0.181818,...,0.551724,0.466667,0.482759,0.551724,0.190476,0.47619,0.64,0.26087,0.692308,0.380952
schwarz/braun,0.7,0.285714,0.111111,0.555556,0.615385,1.0,0.111111,0.583333,0.583333,0.363636,...,0.551724,0.466667,0.413793,0.551724,0.47619,0.285714,0.32,0.521739,0.692308,0.285714
beige,0.0,0.153846,0.4,0.2,0.222222,0.111111,1.0,0.375,0.375,0.142857,...,0.380952,0.454545,0.47619,0.380952,0.153846,0.307692,0.352941,0.4,0.222222,0.307692
braun/weiss,0.222222,0.210526,0.625,0.625,0.583333,0.583333,0.375,1.0,1.0,0.3,...,0.814815,0.785714,0.740741,0.814815,0.421053,0.315789,0.695652,0.666667,0.25,0.315789
weiss/braun,0.222222,0.210526,0.625,0.625,0.583333,0.583333,0.375,1.0,1.0,0.3,...,0.814815,0.785714,0.740741,0.814815,0.421053,0.315789,0.695652,0.666667,0.25,0.315789
black/tan,0.125,0.235294,0.0,0.428571,0.181818,0.363636,0.142857,0.3,0.3,1.0,...,0.32,0.307692,0.24,0.32,0.352941,0.235294,0.190476,0.421053,0.272727,0.352941


## Using correlation table to reduce the number of colors

In [29]:
color_dogs = dogs

for x in colors.index:
    for y in colors.index:
        if supermatcher.loc[x,y] == 1 and x > y:
            for idx, val in enumerate(color_dogs['COLOR']): 
                if val == y:
                    print(idx, val, "------->", x)
            
            

8 braun/schwarz -------> schwarz/braun
12 braun/schwarz -------> schwarz/braun
80 braun/schwarz -------> schwarz/braun
213 braun/schwarz -------> schwarz/braun
239 braun/schwarz -------> schwarz/braun
271 braun/schwarz -------> schwarz/braun
286 braun/schwarz -------> schwarz/braun
456 braun/schwarz -------> schwarz/braun
503 braun/schwarz -------> schwarz/braun
638 braun/schwarz -------> schwarz/braun
738 braun/schwarz -------> schwarz/braun
817 braun/schwarz -------> schwarz/braun
821 braun/schwarz -------> schwarz/braun
848 braun/schwarz -------> schwarz/braun
889 braun/schwarz -------> schwarz/braun
891 braun/schwarz -------> schwarz/braun
922 braun/schwarz -------> schwarz/braun
1016 braun/schwarz -------> schwarz/braun
1161 braun/schwarz -------> schwarz/braun
1163 braun/schwarz -------> schwarz/braun
1167 braun/schwarz -------> schwarz/braun
1186 braun/schwarz -------> schwarz/braun
1365 braun/schwarz -------> schwarz/braun
1397 braun/schwarz -------> schwarz/braun
1420 braun/sc

4286 schwarz/weiss -------> weiss/schwarz
4287 schwarz/weiss -------> weiss/schwarz
4288 schwarz/weiss -------> weiss/schwarz
4293 schwarz/weiss -------> weiss/schwarz
4297 schwarz/weiss -------> weiss/schwarz
4318 schwarz/weiss -------> weiss/schwarz
4357 schwarz/weiss -------> weiss/schwarz
4366 schwarz/weiss -------> weiss/schwarz
4379 schwarz/weiss -------> weiss/schwarz
4391 schwarz/weiss -------> weiss/schwarz
4393 schwarz/weiss -------> weiss/schwarz
4409 schwarz/weiss -------> weiss/schwarz
4426 schwarz/weiss -------> weiss/schwarz
4458 schwarz/weiss -------> weiss/schwarz
4461 schwarz/weiss -------> weiss/schwarz
4480 schwarz/weiss -------> weiss/schwarz
4500 schwarz/weiss -------> weiss/schwarz
4523 schwarz/weiss -------> weiss/schwarz
4555 schwarz/weiss -------> weiss/schwarz
4578 schwarz/weiss -------> weiss/schwarz
4584 schwarz/weiss -------> weiss/schwarz
4588 schwarz/weiss -------> weiss/schwarz
4593 schwarz/weiss -------> weiss/schwarz
4597 schwarz/weiss -------> weiss/

6277 grau/schwarz -------> schwarz/grau
6320 grau/schwarz -------> schwarz/grau
6629 grau/schwarz -------> schwarz/grau
366 beige/schwarz -------> schwarz/beige
689 beige/schwarz -------> schwarz/beige
795 beige/schwarz -------> schwarz/beige
941 beige/schwarz -------> schwarz/beige
1436 beige/schwarz -------> schwarz/beige
2070 beige/schwarz -------> schwarz/beige
2122 beige/schwarz -------> schwarz/beige
2217 beige/schwarz -------> schwarz/beige
2296 beige/schwarz -------> schwarz/beige
2657 beige/schwarz -------> schwarz/beige
2721 beige/schwarz -------> schwarz/beige
2722 beige/schwarz -------> schwarz/beige
2774 beige/schwarz -------> schwarz/beige
2828 beige/schwarz -------> schwarz/beige
2876 beige/schwarz -------> schwarz/beige
2960 beige/schwarz -------> schwarz/beige
3197 beige/schwarz -------> schwarz/beige
3416 beige/schwarz -------> schwarz/beige
3457 beige/schwarz -------> schwarz/beige
3556 beige/schwarz -------> schwarz/beige
3857 beige/schwarz -------> schwarz/beige
39

2313 schwarz/braun/weiss -------> weiss/braun/schwarz
2437 schwarz/braun/weiss -------> weiss/braun/schwarz
2527 schwarz/braun/weiss -------> weiss/braun/schwarz
2648 schwarz/braun/weiss -------> weiss/braun/schwarz
2667 schwarz/braun/weiss -------> weiss/braun/schwarz
2680 schwarz/braun/weiss -------> weiss/braun/schwarz
2724 schwarz/braun/weiss -------> weiss/braun/schwarz
2875 schwarz/braun/weiss -------> weiss/braun/schwarz
2912 schwarz/braun/weiss -------> weiss/braun/schwarz
2964 schwarz/braun/weiss -------> weiss/braun/schwarz
2989 schwarz/braun/weiss -------> weiss/braun/schwarz
3133 schwarz/braun/weiss -------> weiss/braun/schwarz
3194 schwarz/braun/weiss -------> weiss/braun/schwarz
3213 schwarz/braun/weiss -------> weiss/braun/schwarz
3339 schwarz/braun/weiss -------> weiss/braun/schwarz
3409 schwarz/braun/weiss -------> weiss/braun/schwarz
3426 schwarz/braun/weiss -------> weiss/braun/schwarz
3427 schwarz/braun/weiss -------> weiss/braun/schwarz
3455 schwarz/braun/weiss ---

3773 gelb/weiss -------> weiss/gelb
4833 gelb/weiss -------> weiss/gelb
6378 gelb/weiss -------> weiss/gelb
1236 gold/schwarz -------> schwarz/gold
3853 grau/braun/weiss -------> grau/weiss/braun
2930 beige/weiss/braun -------> braun/weiss/beige
765 beige/grau/weiss -------> grau/weiss/beige
1568 beige/grau/weiss -------> grau/weiss/beige
4671 beige/grau/weiss -------> grau/weiss/beige


In [33]:
for xidx, x in enumerate(colors.index):
    for yidx, y in enumerate(colors.index):
        corr = supermatcher.loc[x,y]
        if corr > 0.9 and corr < 1 and xidx < yidx:
            for idx, val in enumerate(color_dogs['COLOR']): 
                if val == y:
                    print(idx, val, "------->", x)

1311 schwarz/grau/weiss -------> schwarz/braun/weiss
1765 schwarz/grau/weiss -------> schwarz/braun/weiss
1818 schwarz/grau/weiss -------> schwarz/braun/weiss
1881 schwarz/grau/weiss -------> schwarz/braun/weiss
2472 schwarz/grau/weiss -------> schwarz/braun/weiss
2529 schwarz/grau/weiss -------> schwarz/braun/weiss
3077 schwarz/grau/weiss -------> schwarz/braun/weiss
3218 schwarz/grau/weiss -------> schwarz/braun/weiss
6169 schwarz/grau/weiss -------> schwarz/braun/weiss
6803 schwarz/grau/weiss -------> schwarz/braun/weiss
665 grau/weiss/schwarz -------> schwarz/braun/weiss
1778 grau/weiss/schwarz -------> schwarz/braun/weiss
1806 grau/weiss/schwarz -------> schwarz/braun/weiss
3765 grau/weiss/schwarz -------> schwarz/braun/weiss
3904 grau/weiss/schwarz -------> schwarz/braun/weiss
1311 schwarz/grau/weiss -------> schwarz/weiss/braun
1765 schwarz/grau/weiss -------> schwarz/weiss/braun
1818 schwarz/grau/weiss -------> schwarz/weiss/braun
1881 schwarz/grau/weiss -------> schwarz/weiss/

In [17]:
my_series = dogs.duplicated()
my_series

0       False
1       False
2       False
3       False
4       False
        ...  
6975    False
6976    False
6977    False
6978    False
6979    False
Length: 6980, dtype: bool

In [18]:
num_dupes = 0
my_series = dogs.duplicated()
for x in dogs.index:
    if my_series[x]:
        num_dupes+= 1
        
print("There are", num_dupes, "duplicate entries")

There are 82 duplicate entries


In [19]:
#unduped_dogs = dogs.drop_duplicates(subset=['HOLDER_ID'])
#unduped_dogs

unduped_dogs = dogs.drop_duplicates()

In [20]:
#Create function to convert a string of form 'x-y' into a float = average(x,y)
import statistics 

def average_age(txt):
    ages = [int(s) for s in txt.split('-') if s.isdigit()]
    return statistics.mean(ages)

print(average_age("12-18-12-a-234-a-b"))

69


In [21]:
#Make sure AGE values have values and are not empty
dogs = dogs[dogs['AGE'].notnull()]
dogs

Unnamed: 0,HOLDER_ID,AGE,GENDER,CITY DISTRICT,CITY QUARTER,BREED,DOG GENDER,COLOR
0,126,51-60,m,9.0,92.0,Welsh Terrier,w,schwarz/braun
1,574,61-70,w,2.0,23.0,Cairn Terrier,w,brindle
2,695,41-50,m,6.0,63.0,Labrador Retriever,w,braun
3,893,61-70,w,7.0,71.0,Mittelschnauzer,w,schwarz
4,1177,51-60,m,10.0,102.0,Shih Tzu,m,schwarz/weiss
...,...,...,...,...,...,...,...,...
6975,128471,21-30,w,8.0,83.0,Tibetan Spaniel,w,braun/beige
6976,128471,21-30,w,8.0,83.0,Rottweiler,m,schwarz/rot
6977,128479,51-60,w,6.0,63.0,Lagotto Romagnolo,m,weiss/grau
6978,128482,21-30,w,9.0,91.0,Yorkshire Terrier,w,schwarz/braun/grau


In [22]:
averaged_dogs = dogs['AGE'].apply(lambda x: average_age(x))
dogs['AGE'] = averaged_dogs
dogs

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dogs['AGE'] = averaged_dogs


Unnamed: 0,HOLDER_ID,AGE,GENDER,CITY DISTRICT,CITY QUARTER,BREED,DOG GENDER,COLOR
0,126,55.5,m,9.0,92.0,Welsh Terrier,w,schwarz/braun
1,574,65.5,w,2.0,23.0,Cairn Terrier,w,brindle
2,695,45.5,m,6.0,63.0,Labrador Retriever,w,braun
3,893,65.5,w,7.0,71.0,Mittelschnauzer,w,schwarz
4,1177,55.5,m,10.0,102.0,Shih Tzu,m,schwarz/weiss
...,...,...,...,...,...,...,...,...
6975,128471,25.5,w,8.0,83.0,Tibetan Spaniel,w,braun/beige
6976,128471,25.5,w,8.0,83.0,Rottweiler,m,schwarz/rot
6977,128479,55.5,w,6.0,63.0,Lagotto Romagnolo,m,weiss/grau
6978,128482,25.5,w,9.0,91.0,Yorkshire Terrier,w,schwarz/braun/grau
