#### Series
A Series is a one-dimensional array-like object containing an array of data (of any
NumPy data type) and an associated array of data labels, called its index. The simplest
Series is formed from only an array of data:


In [16]:
from pandas import *
import pandas as pd

a=Series([1,2,3,4,5])
print(a)

print("WITH INDEXING\n")
a=Series([1,2,3,4,5],index=['a','b','c','d','e'])
print(a,"\n")

#Dictionary to Series
dict1={"Country":"Captial","INDIA":"DELHI","AUTRALIA":"SYDENY","BANGLADESH":"DHAKA","NEPAL":"KAHTMANDU"}
series=Series(dict1)
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64
WITH INDEXING

a    1
b    2
c    3
d    4
e    5
dtype: int64 

Country         Captial
INDIA             DELHI
AUTRALIA         SYDENY
BANGLADESH        DHAKA
NEPAL         KAHTMANDU
dtype: object


#### DataFrame
A DataFrame represents a tabular, spreadsheet-like data structure containing an ordered collection of columns, each of which can be a different value type (numeric,
string, boolean, etc.). The DataFrame has both a row and column index; it can be
thought of as a dict of Series (one for all sharing the same index

In [27]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
 'year': [2000, 2001, 2002, 2001, 2002],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
print("Automatically sorted columns \n")
print(frame)
print("\n")
print("Maintaining the sequence as of dictionary\n")
frame1=DataFrame(data,columns=['state','year','pop'],index=['1','2','3','4','5'])
print(frame1)

Automatically sorted columns 

    state  year  pop
0    Ohio  2000  1.5
1    Ohio  2001  1.7
2    Ohio  2002  3.6
3  Nevada  2001  2.4
4  Nevada  2002  2.9


Maintaining the sequence as of dictionary

    state  year  pop
1    Ohio  2000  1.5
2    Ohio  2001  1.7
3    Ohio  2002  3.6
4  Nevada  2001  2.4
5  Nevada  2002  2.9


In [30]:
frame['state'] 

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
Name: state, dtype: object

In [32]:
frame.year

0    2000
1    2001
2    2002
3    2001
4    2002
Name: year, dtype: int64

Another common form of data is a nested dict of dicts format.
If passed to DataFrame, it will interpret the outer dict keys as the columns and the inner
keys as the row indices

In [36]:
pop = {'Nevada': {2000:3.4,2001: 2.4, 2002: 2.9},
      'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame=DataFrame(pop)
print(frame)


      Nevada  Ohio
2000     3.4   1.5
2001     2.4   1.7
2002     2.9   3.6


### Index Objects
pandas’s Index objects are responsible for holding the axis labels and other metadata
(like the axis name or names). Any array or other sequence of labels used when constructing a Series or DataFrame is internally converted to an Index

In [38]:
obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
print(obj)

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64


In [43]:
#REINDEXING
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0) 
print(obj2)

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64


In [53]:
obj=Series(np.arange(4.),index=['a','b','c','d'])
print(obj)



a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64


In [74]:
data = DataFrame(np.arange(16).reshape((4, 4)),
        index=['Ohio', 'Colorado', 'Utah', 'New York'],
        columns=['one', 'two', 'three', 'four'])
print(data)
print("\n")
data=data.T
print("TRANSPOSED \n")
print(data)
data['Ohio']


          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


TRANSPOSED 

       Ohio  Colorado  Utah  New York
one       0         4     8        12
two       1         5     9        13
three     2         6    10        14
four      3         7    11        15


one      0
two      1
three    2
four     3
Name: Ohio, dtype: int32

## Data Loading, Storage, and File Formats

In [32]:
df=pd.read_csv('pokemon_data.csv')
print(df)

       #                       Name    Type 1  Type 2   HP  Attack  Defense  \
0      1                  Bulbasaur     Grass  Poison   45      49       49   
1      2                    Ivysaur     Grass  Poison   60      62       63   
2      3                   Venusaur     Grass  Poison   80      82       83   
3      3      VenusaurMega Venusaur     Grass  Poison   80     100      123   
4      4                 Charmander      Fire     NaN   39      52       43   
5      5                 Charmeleon      Fire     NaN   58      64       58   
6      6                  Charizard      Fire  Flying   78      84       78   
7      6  CharizardMega Charizard X      Fire  Dragon   78     130      111   
8      6  CharizardMega Charizard Y      Fire  Flying   78     104       78   
9      7                   Squirtle     Water     NaN   44      48       65   
10     8                  Wartortle     Water     NaN   59      63       80   
11     9                  Blastoise     Water     Na

In [35]:
print(df.columns)


Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')


In [37]:
print(df['Name'])

0                      Bulbasaur
1                        Ivysaur
2                       Venusaur
3          VenusaurMega Venusaur
4                     Charmander
5                     Charmeleon
6                      Charizard
7      CharizardMega Charizard X
8      CharizardMega Charizard Y
9                       Squirtle
10                     Wartortle
11                     Blastoise
12       BlastoiseMega Blastoise
13                      Caterpie
14                       Metapod
15                    Butterfree
16                        Weedle
17                        Kakuna
18                      Beedrill
19         BeedrillMega Beedrill
20                        Pidgey
21                     Pidgeotto
22                       Pidgeot
23           PidgeotMega Pidgeot
24                       Rattata
25                      Raticate
26                       Spearow
27                        Fearow
28                         Ekans
29                         Arbok
          

In [45]:
print(df['Name'][1:5])

1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object


In [46]:
print(df.head(10))#prints first 10 rows

   #                       Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  \
0  1                  Bulbasaur  Grass  Poison  45      49       49       65   
1  2                    Ivysaur  Grass  Poison  60      62       63       80   
2  3                   Venusaur  Grass  Poison  80      82       83      100   
3  3      VenusaurMega Venusaur  Grass  Poison  80     100      123      122   
4  4                 Charmander   Fire     NaN  39      52       43       60   
5  5                 Charmeleon   Fire     NaN  58      64       58       80   
6  6                  Charizard   Fire  Flying  78      84       78      109   
7  6  CharizardMega Charizard X   Fire  Dragon  78     130      111      130   
8  6  CharizardMega Charizard Y   Fire  Flying  78     104       78      159   
9  7                   Squirtle  Water     NaN  44      48       65       50   

   Sp. Def  Speed  Generation  Legendary  
0       65     45           1      False  
1       80     60           1    

In [47]:
print(df.iloc[1])#prints first row

#                   2
Name          Ivysaur
Type 1          Grass
Type 2         Poison
HP                 60
Attack             62
Defense            63
Sp. Atk            80
Sp. Def            80
Speed              60
Generation          1
Legendary       False
Name: 1, dtype: object


In [49]:
print(df.iloc[1:5])#print first five rows

   #                   Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  \
1  2                Ivysaur  Grass  Poison  60      62       63       80   
2  3               Venusaur  Grass  Poison  80      82       83      100   
3  3  VenusaurMega Venusaur  Grass  Poison  80     100      123      122   
4  4             Charmander   Fire     NaN  39      52       43       60   

   Sp. Def  Speed  Generation  Legendary  
1       80     60           1      False  
2      100     80           1      False  
3      120     80           1      False  
4       50     65           1      False  


In [53]:
print(df.iloc[2,2])#prints element from e=second row and second column

Grass


In [58]:
for index,rows in df.iterrows():
    print(index,rows['Name'])

0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
5 Charmeleon
6 Charizard
7 CharizardMega Charizard X
8 CharizardMega Charizard Y
9 Squirtle
10 Wartortle
11 Blastoise
12 BlastoiseMega Blastoise
13 Caterpie
14 Metapod
15 Butterfree
16 Weedle
17 Kakuna
18 Beedrill
19 BeedrillMega Beedrill
20 Pidgey
21 Pidgeotto
22 Pidgeot
23 PidgeotMega Pidgeot
24 Rattata
25 Raticate
26 Spearow
27 Fearow
28 Ekans
29 Arbok
30 Pikachu
31 Raichu
32 Sandshrew
33 Sandslash
34 Nidoran (Female)
35 Nidorina
36 Nidoqueen
37 Nidoran (Male)
38 Nidorino
39 Nidoking
40 Clefairy
41 Clefable
42 Vulpix
43 Ninetales
44 Jigglypuff
45 Wigglytuff
46 Zubat
47 Golbat
48 Oddish
49 Gloom
50 Vileplume
51 Paras
52 Parasect
53 Venonat
54 Venomoth
55 Diglett
56 Dugtrio
57 Meowth
58 Persian
59 Psyduck
60 Golduck
61 Mankey
62 Primeape
63 Growlithe
64 Arcanine
65 Poliwag
66 Poliwhirl
67 Poliwrath
68 Abra
69 Kadabra
70 Alakazam
71 AlakazamMega Alakazam
72 Machop
73 Machoke
74 Machamp
75 Bellsprout
76 Weepinbell
77 

791 Noivern
792 Xerneas
793 Yveltal
794 Zygarde50% Forme
795 Diancie
796 DiancieMega Diancie
797 HoopaHoopa Confined
798 HoopaHoopa Unbound
799 Volcanion


In [62]:
print(df.loc[df['Name']=='Pikachu'])#finds and prints a specific element

     #     Name    Type 1 Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  \
30  25  Pikachu  Electric    NaN  35      55       40       50       50   

    Speed  Generation  Legendary  
30     90           1      False  


In [65]:
df.describe()#prints all statistics of data set

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation
count,800.0,800.0,800.0,800.0,800.0,800.0,800.0,800.0
mean,362.81375,69.25875,79.00125,73.8425,72.82,71.9025,68.2775,3.32375
std,208.343798,25.534669,32.457366,31.183501,32.722294,27.828916,29.060474,1.66129
min,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,184.75,50.0,55.0,50.0,49.75,50.0,45.0,2.0
50%,364.5,65.0,75.0,70.0,65.0,70.0,65.0,3.0
75%,539.25,80.0,100.0,90.0,95.0,90.0,90.0,5.0
max,721.0,255.0,190.0,230.0,194.0,230.0,180.0,6.0


In [71]:
print(df.sort_values(['HP','Attack','Defense'],ascending=[0,0,0]))#sort values aplhabetically 

       #                   Name    Type 1    Type 2   HP  Attack  Defense  \
261  242                Blissey    Normal       NaN  255      10       10   
121  113                Chansey    Normal       NaN  250       5        5   
217  202              Wobbuffet   Psychic       NaN  190      33       58   
351  321                Wailord     Water       NaN  170      90       45   
655  594              Alomomola     Water       NaN  165      75       80   
155  143                Snorlax    Normal       NaN  160     110       65   
313  289                Slaking    Normal       NaN  150     160      100   
545  487   GiratinaOrigin Forme     Ghost    Dragon  150     120      100   
544  487  GiratinaAltered Forme     Ghost    Dragon  150     100      120   
473  426               Drifblim     Ghost    Flying  150      80       44   
321  297               Hariyama  Fighting       NaN  144     120       60   
45    40             Wigglytuff    Normal     Fairy  140      70       45   

### ADDIND DATA TO DATA SET

In [80]:
df['Total']=df['HP']+df['Attack']+df['Defense']+df['Sp. Atk']+df['Sp. Def']+df['Speed']
df.sort_values(['Total'],ascending=False)

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Total
426,384,RayquazaMega Rayquaza,Dragon,Flying,105,180,100,180,100,115,3,True,780
164,150,MewtwoMega Mewtwo Y,Psychic,,106,150,70,194,120,140,1,True,780
163,150,MewtwoMega Mewtwo X,Psychic,Fighting,106,190,100,154,100,130,1,True,780
422,382,KyogrePrimal Kyogre,Water,,100,150,90,180,160,90,3,True,770
424,383,GroudonPrimal Groudon,Ground,Fire,100,180,160,150,90,90,3,True,770
552,493,Arceus,Normal,,120,120,120,120,120,120,4,True,720
712,646,KyuremWhite Kyurem,Dragon,Ice,125,120,90,170,100,95,5,True,700
711,646,KyuremBlack Kyurem,Dragon,Ice,125,170,100,120,90,95,5,True,700
409,373,SalamenceMega Salamence,Dragon,Flying,95,145,130,120,90,120,3,False,700
413,376,MetagrossMega Metagross,Steel,Psychic,80,145,150,105,110,110,3,False,700


In [84]:
df=df.drop(columns=['Generation'])#to drop a column

KeyError: "['Generation'] not found in axis"

### WRITING TO A FILE

In [87]:
df.to_csv('Pokemon.txt',sep='\t')
#df.to_excel('Pokemon.xlxs')

### FILTERING DATA

In [116]:
new_df=df.loc[(df['Type 1']=='Fire') & (df['HP']>100)]
print(new_df)
new_df.to_csv('Sorted Fire Pokemon.txt',sep='\t')

       #                     Name Type 1    Type 2   HP  Attack  Defense  \
263  244                    Entei   Fire       NaN  115     115       85   
270  250                    Ho-oh   Fire    Flying  106     130       90   
559  500                   Emboar   Fire  Fighting  110     123       65   
615  555  DarmanitanStandard Mode   Fire       NaN  105     140       55   
616  555       DarmanitanZen Mode   Fire   Psychic  105      30      105   

     Sp. Atk  Sp. Def  Speed  Legendary  Total  
263       90       75    100       True    580  
270      110      154     90       True    680  
559      100       65     65      False    528  
615       30       55     95      False    480  
616      140      105     55      False    540  


In [117]:
#TO REMOVE DATA WITH SPECIFIC STRINGS IN IT
df.loc[~df['Name'].str.contains('Mega')]

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Legendary,Total
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,False,318
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,False,405
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,False,525
4,4,Charmander,Fire,,39,52,43,60,50,65,False,309
5,5,Charmeleon,Fire,,58,64,58,80,65,80,False,405
6,6,Charizard,Fire,Flying,78,84,78,109,85,100,False,534
9,7,Squirtle,Water,,44,48,65,50,64,43,False,314
10,8,Wartortle,Water,,59,63,80,65,80,58,False,405
11,9,Blastoise,Water,,79,83,100,85,105,78,False,530
13,10,Caterpie,Bug,,45,30,35,20,20,45,False,195
