In [7]:
import numpy as np
import pandas as pd
pd.options.display.max_columns = None

## Data Description

1. __Relevant Information__:
   -- This data file contains details of various nations and their flags.
      In this file the fields are separated by spaces (not commas).  With
      this data you can try things like predicting the religion of a country
      from its size and the colours in its flag.  
   -- 10 attributes are numeric-valued.  The remainder are either Boolean-
      or nominal-valued.

2. Number of Instances: 194

3. Number of attributes: 30 (overall)

4. Attribute Information:
   1. __name__ : Name of the country concerned
   2. __landmass__ :	1=N.America, 2=S.America, 3=Europe, 4=Africa, 5=Asia, 6=Oceania
   3. __zone__ : Geographic quadrant, based on Greenwich and the Equator 1=NE, 2=SE, 3=SW, 4=NW
   4. __area__	in thousands of square km
   5. __population__	in round millions
   6. __language__ 1=English, 2=Spanish, 3=French, 4=German, 5=Slavic, 6=Other Indo-European, 7=Chinese, 8=Arabic, 9=Japanese/Turkish/Finnish/Magyar, 10=Others
   7. __religion__ 0=Catholic, 1=Other Christian, 2=Muslim, 3=Buddhist, 4=Hindu, 5=Ethnic, 6=Marxist, 7=Others
   8. __bars__     Number of vertical bars in the flag
   9. __stripes__  Number of horizontal stripes in the flag
   10. __colours__  Number of different colours in the flag
   11. __red__      0 if red absent, 1 if red present in the flag
   12. __green__    same for green
   13. __blue__     same for blue
   14. __gold__     same for gold (also yellow)
   15. __white__    same for white
   16. __black__    same for black
   17. __orange__   same for orange (also brown)
   18. __mainhue__  predominant colour in the flag (tie-breaks decided by taking the topmost hue, if that fails then the most central hue, and if that fails the leftmost hue)
   19. __circles__  Number of circles in the flag
   20. __crosses__  Number of (upright) crosses
   21. __saltires__ Number of diagonal crosses
   22. __quarters__ Number of quartered sections
   23. __sunstars__ Number of sun or star symbols
   24. __crescent__ 1 if a crescent moon symbol present, else 0
   25. __triangle__ 1 if any triangles present, 0 otherwise
   26. __icon__     1 if an inanimate image present (e.g., a boat), otherwise 0
   27. __animate__  1 if an animate image (e.g., an eagle, a tree, a human hand)
               present, 0 otherwise
   28. __text__     1 if any letters or writing on the flag (e.g., a motto or
               slogan), 0 otherwise
   29. __topleft__  colour in the top-left corner (moving right to decide 
               tie-breaks)
   30. __botright__ Colour in the bottom-left corner (moving left to decide 
               tie-breaks)

In [None]:
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/flags/flag.data', header=None)

In [9]:
column_names = (
    'name', 'landmass', 'zone', 'area', 'population', 
    'language', 'region', 'bars', 'stipes', 'colors', 
    'red', 'green', 'blue', 'gold', 'white', 
    'black', 'orange', 'mainhue', 'circles', 'crosses', 
    'saltires', 'quarters', 'sunstars', 'crescent', 'triangle', 
    'icon', 'animate', 'text', 'topleft', 'botright'
    )

data.columns = column_names
data.head()

Unnamed: 0,name,landmass,zone,area,population,language,region,bars,stipes,colors,red,green,blue,gold,white,black,orange,mainhue,circles,crosses,saltires,quarters,sunstars,crescent,triangle,icon,animate,text,topleft,botright
0,Afghanistan,5,1,648,16,10,2,0,3,5,1,1,0,1,1,1,0,green,0,0,0,0,1,0,0,1,0,0,black,green
1,Albania,3,1,29,3,6,6,0,0,3,1,0,0,1,0,1,0,red,0,0,0,0,1,0,0,0,1,0,red,red
2,Algeria,4,1,2388,20,8,2,2,0,3,1,1,0,0,1,0,0,green,0,0,0,0,1,1,0,0,0,0,green,white
3,American-Samoa,6,3,0,0,1,1,0,0,5,1,0,1,1,1,0,1,blue,0,0,0,0,0,0,1,1,1,0,blue,red
4,Andorra,3,1,0,0,6,0,3,0,3,1,0,1,1,0,0,0,gold,0,0,0,0,0,0,0,0,0,0,blue,red


In [10]:
data.loc[data.name == 'India']

Unnamed: 0,name,landmass,zone,area,population,language,region,bars,stipes,colors,red,green,blue,gold,white,black,orange,mainhue,circles,crosses,saltires,quarters,sunstars,crescent,triangle,icon,animate,text,topleft,botright
81,India,5,1,3268,684,6,4,0,3,4,0,1,1,0,1,0,1,orange,1,0,0,0,0,0,0,1,0,0,orange,green


In [49]:
color_map = dict(
    red=1,
    green=2,
    blue=3,
    gold=4,
    white=5,
    orange=6,
    black=7
)

X = data.drop(columns=['name'])
X.mainhue = X.mainhue.map(color_map)
X.topleft = X.topleft.map(color_map)
X.botright = X.botright.map(color_map)
X.fillna(0, inplace = True)
y = data.name

In [50]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    n_estimators=20,
    oob_score=True
)
clf.fit(X, y)

RandomForestClassifier(n_estimators=20, oob_score=True)

In [52]:
clf.score(X, y)

1.0

In [None]:
clf.predict(X[])

In [60]:
clf.predict(X.loc[141].to_numpy().reshape(1, -1))



array(['Qatar'], dtype=object)