# Data Manipulation with pandas

### Sorting rows
Finding interesting bits of data in a DataFrame is often easier if you change the order of the rows. You can sort the rows by passing a column name to `.sort_values()`.

In cases where rows have the same value (this is common if you sort on a categorical variable), you may wish to break the ties by sorting on another column. You can sort on multiple columns in this way by passing a list of column names.

|Sort on …        |	Syntax                                 |
|-----------------|----------------------------------------|
|one column       |df.sort_values("breed")                 |
|multiple columns |	df.sort_values(["breed", "weight_kg"]) |

By combining `.sort_values()` with `.head()`, you can answer questions in the form, "What are the top cases where…?".

In [1]:
import pandas as pd

In [4]:
# Import the 2D dataset for American homelessness information
homelessness_int = [['East South Central', 'Alabama', 2570.0, 864.0, 4887681],
 ['Pacific', 'Alaska', 1434.0, 582.0, 735139],
 ['Mountain', 'Arizona', 7259.0, 2606.0, 7158024],
 ['West South Central', 'Arkansas', 2280.0, 432.0, 3009733],
 ['Pacific', 'California', 109008.0, 20964.0, 39461588],
 ['Pacific', 'California', 109008.0, 20964.0, 39461588],
 ['Mountain', 'Colorado', 7607.0, 3250.0, 5691287],
 ['New, England', 'Connecticut', 2280.0, 1696.0, 3571520],
 ['South, Atlantic', 'Delaware', 708.0, 374.0, 965479],
 ['South, Atlantic', 'District, of, Columbia', 3770.0, 3134.0, 701547],
 ['South, Atlantic', 'Florida', 21443.0, 9587.0, 21244317],
 ['South, Atlantic', 'Georgia', 6943.0, 2556.0, 10511131],
 ['Pacific', 'Hawaii', 4131.0, 2399.0, 1420593],
 ['Mountain', 'Idaho', 1297.0, 715.0, 1750536],
 ['East, North, Central', 'Illinois', 6752.0, 3891.0, 12723071],
 ['East, North, Central', 'Indiana', 3776.0, 1482.0, 6695497],
 ['West, North, Central', 'Iowa', 1711.0, 1038.0, 3148618],
 ['West, North, Central', 'Kansas', 1443.0, 773.0, 2911359],
 ['East, South, Central', 'Kentucky', 2735.0, 953.0, 4461153],
 ['West, South, Central', 'Louisiana', 2540.0, 519.0, 4659690],
 ['New, England', 'Maine', 1450.0, 1066.0, 1339057],
 ['South, Atlantic', 'Maryland', 4914.0, 2230.0, 6035802],
 ['New, England', 'Massachusetts', 6811.0, 13257.0, 6882635],
 ['East, North, Central', 'Michigan', 5209.0, 3142.0, 9984072],
 ['West, North, Central', 'Minnesota', 3993.0, 3250.0, 5606249],
 ['East, South, Central', 'Mississippi', 1024.0, 328.0, 2981020],
 ['West, North, Central', 'Missouri', 3776.0, 2107.0, 6121623],
 ['Mountain', 'Montana', 983.0, 422.0, 1060665],
 ['West, North, Central', 'Nebraska', 1745.0, 676.0, 1925614],
 ['Mountain', 'Nevada', 7058.0, 486.0, 3027341],
 ['New, England', 'New, Hampshire', 835.0, 615.0, 1353465],
 ['Mid-Atlantic', 'New, Jersey', 6048.0, 3350.0, 8886025],
 ['Mountain', 'New, Mexico', 1949.0, 602.0, 2092741],
 ['Mid-Atlantic', 'New, York', 39827.0, 52070.0, 19530351],
 ['South, Atlantic', 'North, Carolina', 6451.0, 2817.0, 10381615],
 ['West, North, Central', 'North, Dakota', 467.0, 75.0, 758080],
 ['East, North, Central', 'Ohio', 6929.0, 3320.0, 11676341],
 ['West, South, Central', 'Oklahoma', 2823.0, 1048.0, 3940235],
 ['Pacific', 'Oregon', 11139.0, 3337.0, 4181886],
 ['Mid-Atlantic', 'Pennsylvania', 8163.0, 5349.0, 12800922],
 ['New, England', 'Rhode, Island', 747.0, 354.0, 1058287],
 ['South, Atlantic', 'South, Carolina', 3082.0, 851.0, 5084156],
 ['West, North, Central', 'South, Dakota', 836.0, 323.0, 878698],
 ['East, South, Central', 'Tennessee', 6139.0, 1744.0, 6771631],
 ['West, South, Central', 'Texas', 19199.0, 6111.0, 28628666],
 ['Mountain', 'Utah', 1904.0, 972.0, 3153550],
 ['New, England', 'Vermont', 780.0, 511.0, 624358],
 ['South, Atlantic', 'Virginia', 3928.0, 2047.0, 8501286],
 ['Pacific', 'Washington', 16424.0, 5880.0, 7523869],
 ['South, Atlantic', 'West, Virginia', 1021.0, 222.0, 1804291],
 ['East, North, Central', 'Wisconsin', 2740.0, 2167.0, 5807406],
 ['Mountain', 'Wyoming', 434.0, 205.0, 577601]]


In [7]:
# Convert the 2D array to a pandas dataframe
homelessness = pd.DataFrame(homelessness_int)
print(homelessness.head())
print(homelessness.columns)

                    0           1         2        3         4
0  East South Central     Alabama    2570.0    864.0   4887681
1             Pacific      Alaska    1434.0    582.0    735139
2            Mountain     Arizona    7259.0   2606.0   7158024
3  West South Central    Arkansas    2280.0    432.0   3009733
4             Pacific  California  109008.0  20964.0  39461588
RangeIndex(start=0, stop=5, step=1)


In [9]:
# Update the column names to align with the data features
homelessness.columns = ['region', 'state', 'individuals', 'family_members', 'state_pop']
homelessness.head()

Unnamed: 0,region,state,individuals,family_members,state_pop
0,East South Central,Alabama,2570.0,864.0,4887681
1,Pacific,Alaska,1434.0,582.0,735139
2,Mountain,Arizona,7259.0,2606.0,7158024
3,West South Central,Arkansas,2280.0,432.0,3009733
4,Pacific,California,109008.0,20964.0,39461588


In [10]:
# Sort homelessness by individuals
homelessness_ind = homelessness.sort_values("individuals")

# Print the top few rows
print(homelessness_ind.head())

                  region          state  individuals  family_members  \
51              Mountain        Wyoming        434.0           205.0   
35  West, North, Central  North, Dakota        467.0            75.0   
8        South, Atlantic       Delaware        708.0           374.0   
40          New, England  Rhode, Island        747.0           354.0   
46          New, England        Vermont        780.0           511.0   

    state_pop  
51     577601  
35     758080  
8      965479  
40    1058287  
46     624358  


In [11]:
# Sort homelessness by descending family members
homelessness_fam = homelessness.sort_values("family_members", ascending=False)

# Print the top few rows
print(homelessness_fam.head())

             region          state  individuals  family_members  state_pop
33     Mid-Atlantic      New, York      39827.0         52070.0   19530351
4           Pacific     California     109008.0         20964.0   39461588
5           Pacific     California     109008.0         20964.0   39461588
22     New, England  Massachusetts       6811.0         13257.0    6882635
10  South, Atlantic        Florida      21443.0          9587.0   21244317


In [12]:
# Sort homelessness by region, then descending family members
homelessness_reg_fam = homelessness.sort_values(["region", "family_members"], ascending = [True, False])

# Print the top few rows
print(homelessness_reg_fam.head())

                  region      state  individuals  family_members  state_pop
0     East South Central    Alabama       2570.0           864.0    4887681
14  East, North, Central   Illinois       6752.0          3891.0   12723071
36  East, North, Central       Ohio       6929.0          3320.0   11676341
23  East, North, Central   Michigan       5209.0          3142.0    9984072
50  East, North, Central  Wisconsin       2740.0          2167.0    5807406
