In [1]:
import pandas as pd
import numpy as np

# Text data

In [2]:
chicago = pd.read_csv("chicago.csv").dropna(how="all")
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [3]:
chicago.info()

<class 'pandas.core.frame.DataFrame'>
Index: 32062 entries, 0 to 32061
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Name                    32062 non-null  object
 1   Position Title          32062 non-null  object
 2   Department              32062 non-null  object
 3   Employee Annual Salary  32062 non-null  object
dtypes: object(4)
memory usage: 1.2+ MB


In [4]:
chicago.nunique()

Name                      31776
Position Title             1093
Department                   35
Employee Annual Salary     1156
dtype: int64

In [5]:
chicago["Department"] = chicago["Department"].astype("category")

## basic string methods

- str.replace("")
- str.contain("")
- str.startwith("")
- str.lower()
- str.len()

In [6]:
chicago["Position Title"].str.lower()
chicago["Position Title"].str.upper()
chicago["Position Title"].str.title()
chicago["Position Title"].str.len()
chicago["Position Title"].str.title().str.len()
chicago["Position Title"].str.strip()
chicago["Position Title"].str.lstrip()
chicago["Position Title"].str.rstrip()

chicago["Department"].str.replace("MGMNT", "MANAGEMENT").str.title()

0        Water Management
1                  Police
2                  Police
3        General Services
4        Water Management
               ...       
32057    General Services
32058              Police
32059              Police
32060              Police
32061                Doit
Name: Department, Length: 32062, dtype: object

In [7]:
chicago["Position Title"].str.title().str.len()

0        16
1        14
2        14
3        24
4        17
         ..
32057    30
32058    14
32059    14
32060    14
32061    23
Name: Position Title, Length: 32062, dtype: int64

In [8]:
chicago["Position Title"].str.lower().str.contains("water")

0         True
1        False
2        False
3        False
4        False
         ...  
32057    False
32058    False
32059    False
32060    False
32061    False
Name: Position Title, Length: 32062, dtype: bool

In [9]:
chicago["Position Title"].str.lower().str.startswith("civil")

0        False
1        False
2        False
3        False
4         True
         ...  
32057    False
32058    False
32059    False
32060    False
32061    False
Name: Position Title, Length: 32062, dtype: bool

## str. methods on columns and indexes

In [10]:
chicago = pd.read_csv("chicago.csv", index_col="Name").dropna(how="all").sort_index()

chicago.index.str.strip().str.title()

Index(['Aaron,  Elvia J', 'Aaron,  Jeffery M', 'Aaron,  Karina',
       'Aaron,  Kimberlei R', 'Abad Jr,  Vicente M', 'Abarca,  Anabel',
       'Abarca,  Emmanuel', 'Abascal,  Reece E', 'Abbasi,  Christopher',
       'Abbatacola,  Robert J',
       ...
       'Zwit,  Jeffrey J', 'Zwolfer,  Matthew W', 'Zych,  Mateusz',
       'Zydek,  Bryan', 'Zygadlo,  John P', 'Zygadlo,  Michael J',
       'Zygowicz,  Peter J', 'Zymantas,  Mark E', 'Zyrkowski,  Carlo E',
       'Zyskowski,  Dariusz'],
      dtype='object', name='Name', length=32062)

In [11]:
chicago.columns.str.upper()

Index(['POSITION TITLE', 'DEPARTMENT', 'EMPLOYEE ANNUAL SALARY'], dtype='object')

## str.split method
str.get() reaches out to elements in a string or a list

In [12]:
chicago = pd.read_csv("chicago.csv").dropna(how="all").sort_index()

In [13]:
# The most common first word in our job positions/titles
chicago["Position Title"].str.split(" ").str.get(0).value_counts()

Position Title
POLICE             10856
FIREFIGHTER-EMT     1509
SERGEANT            1186
POOL                 918
FIREFIGHTER          810
                   ...  
DENTIST                1
ASSOC                  1
TELEPHONE              1
MAYOR                  1
PREPRESS               1
Name: count, Length: 320, dtype: int64

In [14]:
chicago["Position Title"].str.split(" ").str.get(1)

0            RATE
1         OFFICER
2         OFFICER
3        CONTRACT
4        ENGINEER
           ...   
32057          OF
32058     OFFICER
32059     OFFICER
32060     OFFICER
32061        DATA
Name: Position Title, Length: 32062, dtype: object

In [15]:
# Finding the most common first name among the employees

chicago["Name"].str.title().str.split(", ").str.get(1).str.strip().str.split(" ").str.get(0).value_counts()

Name
Michael     1153
John         899
James        676
Robert       622
Joseph       537
            ... 
Deena          1
Cherrise       1
Eartha         1
Ernika         1
Mac            1
Name: count, Length: 5091, dtype: int64

In [16]:
chicago[["Last Name", "First Name"]] = chicago["Name"].str.split(",", expand=True)
chicago

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Last Name,First Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,AARON,KARINA
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,AARON,KIMBERLEI R
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,ABAD JR,VICENTE M
...,...,...,...,...,...,...
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,ZYGADLO,MICHAEL J
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,ZYGOWICZ,PETER J
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,ZYMANTAS,MARK E
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,ZYRKOWSKI,CARLO E


## split one column directly to two columns with splitted info

In [17]:
chicago[["Primary Title", "Secondary Title"]] = chicago["Position Title"].str.split(" ", expand=True, n=1)

In [18]:
chicago

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Last Name,First Name,Primary Title,Secondary Title
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,AARON,ELVIA J,WATER,RATE TAKER
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,AARON,JEFFERY M,POLICE,OFFICER
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,AARON,KARINA,POLICE,OFFICER
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,AARON,KIMBERLEI R,CHIEF,CONTRACT EXPEDITER
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,ABAD JR,VICENTE M,CIVIL,ENGINEER IV
...,...,...,...,...,...,...,...,...
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,ZYGADLO,MICHAEL J,FRM,OF MACHINISTS - AUTOMOTIVE
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,ZYGOWICZ,PETER J,POLICE,OFFICER
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,ZYMANTAS,MARK E,POLICE,OFFICER
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,ZYRKOWSKI,CARLO E,POLICE,OFFICER


# Date and time

## datetime library

dt container, attributes

In [19]:
import datetime as dt

someday = dt.date(2025, 12, 15)

someday.year
someday.month
someday.day

dt.datetime(2025, 12, 15)
dt.datetime(2025, 12, 15, 8)
dt.datetime(2025, 12, 15, 8, 13)
dt.datetime(2025, 12, 15, 8, 13, 59)

sometime = dt.datetime(2025, 12, 15, 8, 13, 59)
sometime.year
sometime.month
sometime.day
sometime.hour
sometime.minute
sometime.second

59

In [20]:
pd.Timestamp(2027, 3, 12)
pd.Timestamp(2027, 3, 12, 18, 23, 49)
pd.Timestamp(dt.date(2028, 10, 23))
pd.Timestamp(dt.datetime(2028, 10, 23, 14, 35))
pd.Timestamp("2025-01-01")
pd.Timestamp("2025/04/01")
pd.Timestamp("2021-03-08 08:35:15")

pd.Series([pd.Timestamp("2021-03-08 08:35:15")]).iloc[0]

Timestamp('2021-03-08 08:35:15')

## different ranges of dates

In [21]:
pd.date_range(start="2025-01-01", end="2025-01-07")
pd.date_range(start="2025-01-01", end="2025-01-07", freq="D")
pd.date_range(start="2025-01-01", end="2025-01-07", freq="2D")
pd.date_range(start="2025-01-01", end="2025-01-07", freq="B") # business days - Monday-Friday
pd.date_range(start="2025-01-01", end="2025-01-31", freq="W")
pd.date_range(start="2025-01-01", end="2025-01-31", freq="W-FRI")
pd.date_range(start="2025-01-01", end="2025-01-31", freq="W-THU")

pd.date_range(start="2025-01-01", end="2025-01-31", freq="h")
pd.date_range(start="2025-01-01", end="2025-01-31", freq="6h")

pd.date_range(start="2025-01-01", end="2025-12-31", freq="ME") # end of month
pd.date_range(start="2025-01-01", end="2025-12-31", freq="MS")
pd.date_range(start="2025-01-01", end="2050-12-31", freq="YE")

pd.date_range(start="2012-09-09", freq="D", periods=25)
pd.date_range(start="2012-09-09", freq="3D", periods=40)
pd.date_range(start="2012-09-09", freq="B", periods=180)

pd.date_range(end="2013-10-31", freq="D", periods=20)
pd.date_range(end="2016-12-31", freq="B", periods=75)
pd.date_range(end="1991-04-12", freq="W-FRI", periods=75)

DatetimeIndex(['1989-11-10', '1989-11-17', '1989-11-24', '1989-12-01',
               '1989-12-08', '1989-12-15', '1989-12-22', '1989-12-29',
               '1990-01-05', '1990-01-12', '1990-01-19', '1990-01-26',
               '1990-02-02', '1990-02-09', '1990-02-16', '1990-02-23',
               '1990-03-02', '1990-03-09', '1990-03-16', '1990-03-23',
               '1990-03-30', '1990-04-06', '1990-04-13', '1990-04-20',
               '1990-04-27', '1990-05-04', '1990-05-11', '1990-05-18',
               '1990-05-25', '1990-06-01', '1990-06-08', '1990-06-15',
               '1990-06-22', '1990-06-29', '1990-07-06', '1990-07-13',
               '1990-07-20', '1990-07-27', '1990-08-03', '1990-08-10',
               '1990-08-17', '1990-08-24', '1990-08-31', '1990-09-07',
               '1990-09-14', '1990-09-21', '1990-09-28', '1990-10-05',
               '1990-10-12', '1990-10-19', '1990-10-26', '1990-11-02',
               '1990-11-09', '1990-11-16', '1990-11-23', '1990-11-30',
      

## dt attributes

In [22]:
bunch_of_dates = pd.Series(pd.date_range(start="2000-01-01", end="2020-12-31", freq="24D 3H"))

bunch_of_dates.head()

  bunch_of_dates = pd.Series(pd.date_range(start="2000-01-01", end="2020-12-31", freq="24D 3H"))


0   2000-01-01 00:00:00
1   2000-01-25 03:00:00
2   2000-02-18 06:00:00
3   2000-03-13 09:00:00
4   2000-04-06 12:00:00
dtype: datetime64[ns]

In [23]:
bunch_of_dates.dt.day
bunch_of_dates.dt.month
bunch_of_dates.dt.year
bunch_of_dates.dt.hour
bunch_of_dates.dt.day_of_year
bunch_of_dates.dt.day_name()

bunch_of_dates.dt.is_month_end
bunch_of_dates.dt.is_month_start

bunch_of_dates[bunch_of_dates.dt.is_quarter_start]

0     2000-01-01 00:00:00
106   2007-01-01 06:00:00
212   2014-01-01 12:00:00
299   2019-10-01 09:00:00
dtype: datetime64[ns]

## important: date search in rows

In [24]:
stocks = pd.read_csv("ibm.csv", parse_dates=["Date"], index_col="Date").sort_index()
pd.set_option("display.precision", 3)
stocks.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.046,5.046,4.987,4.987,593562.955
1962-01-03,4.987,5.033,4.987,5.033,445175.034
1962-01-04,5.033,5.033,4.981,4.981,399513.587
1962-01-05,4.974,4.974,4.875,4.882,559321.481
1962-01-08,4.882,4.882,4.751,4.79,833273.771


In [25]:
stocks.loc["2014-03-04"]

Open      1.289e+02
High      1.298e+02
Low       1.288e+02
Close     1.293e+02
Volume    6.825e+06
Name: 2014-03-04 00:00:00, dtype: float64

In [26]:
stocks.loc[pd.Timestamp(2014, 3, 4)]

Open      1.289e+02
High      1.298e+02
Low       1.288e+02
Close     1.293e+02
Volume    6.825e+06
Name: 2014-03-04 00:00:00, dtype: float64

In [27]:
stocks.loc[pd.Timestamp(2014, 3, 4):pd.Timestamp(2014, 12, 31)]

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-03-04,128.870,129.827,128.802,129.329,6.825e+06
2014-03-05,129.407,130.344,129.319,129.807,5.028e+06
2014-03-06,129.963,130.676,129.631,130.159,5.504e+06
2014-03-07,130.676,131.047,129.837,130.198,5.937e+06
2014-03-10,130.090,130.666,128.890,129.309,6.623e+06
...,...,...,...,...,...
2014-12-24,115.119,115.188,114.183,114.359,2.646e+06
2014-12-26,114.651,115.257,114.495,114.700,2.706e+06
2014-12-29,114.485,114.700,112.661,113.412,4.715e+06
2014-12-30,113.090,113.656,112.934,113.109,4.005e+06


In [28]:
stocks.truncate("2014-03-04", "2014-12-31")

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-03-04,128.870,129.827,128.802,129.329,6.825e+06
2014-03-05,129.407,130.344,129.319,129.807,5.028e+06
2014-03-06,129.963,130.676,129.631,130.159,5.504e+06
2014-03-07,130.676,131.047,129.837,130.198,5.937e+06
2014-03-10,130.090,130.666,128.890,129.309,6.623e+06
...,...,...,...,...,...
2014-12-24,115.119,115.188,114.183,114.359,2.646e+06
2014-12-26,114.651,115.257,114.495,114.700,2.706e+06
2014-12-29,114.485,114.700,112.661,113.412,4.715e+06
2014-12-30,113.090,113.656,112.934,113.109,4.005e+06


## The DateOffset Object: evenly add a time
- A **DateOffset** object adds time to a **Timestamp** to arrive at a new **Timestamp**.
- The **DateOffset** constructor accepts `days`, `weeks`, `months`, `years` parameters, and more.
- We can pass a **DateOffset** object to the `freq` parameter of the `pd.date_range` function.

In [29]:
stocks = pd.read_csv("ibm.csv", parse_dates=["Date"], index_col="Date").sort_index()
stocks.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962-01-02,5.046,5.046,4.987,4.987,593562.955
1962-01-03,4.987,5.033,4.987,5.033,445175.034
1962-01-04,5.033,5.033,4.981,4.981,399513.587
1962-01-05,4.974,4.974,4.875,4.882,559321.481
1962-01-08,4.882,4.882,4.751,4.79,833273.771


In [30]:
stocks.index + pd.DateOffset(days=5)
stocks.index - pd.DateOffset(days=5)
stocks.index + pd.DateOffset(months=3)
stocks.index - pd.DateOffset(years=1)
stocks.index + pd.DateOffset(hours=7)

stocks.index + pd.DateOffset(years=1, months=3, days=2, hours=14, minutes=23, seconds=12)

DatetimeIndex(['1963-04-04 14:23:12', '1963-04-05 14:23:12',
               '1963-04-06 14:23:12', '1963-04-07 14:23:12',
               '1963-04-10 14:23:12', '1963-04-11 14:23:12',
               '1963-04-12 14:23:12', '1963-04-13 14:23:12',
               '1963-04-14 14:23:12', '1963-04-17 14:23:12',
               ...
               '2024-12-30 14:23:12', '2024-12-31 14:23:12',
               '2025-01-04 14:23:12', '2025-01-05 14:23:12',
               '2025-01-06 14:23:12', '2025-01-07 14:23:12',
               '2025-01-08 14:23:12', '2025-01-11 14:23:12',
               '2025-01-12 14:23:12', '2025-01-13 14:23:12'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

In [31]:
stocks.index + pd.tseries.offsets.MonthEnd()
stocks.index - pd.tseries.offsets.MonthEnd()

stocks.index + pd.tseries.offsets.QuarterEnd()
stocks.index - pd.tseries.offsets.QuarterEnd()

stocks.index + pd.tseries.offsets.QuarterBegin(startingMonth=1)
stocks.index - pd.tseries.offsets.QuarterBegin(startingMonth=1)

stocks.index + pd.tseries.offsets.YearEnd()
stocks.index + pd.tseries.offsets.YearBegin()

DatetimeIndex(['1963-01-01', '1963-01-01', '1963-01-01', '1963-01-01',
               '1963-01-01', '1963-01-01', '1963-01-01', '1963-01-01',
               '1963-01-01', '1963-01-01',
               ...
               '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01',
               '2024-01-01', '2024-01-01', '2024-01-01', '2024-01-01',
               '2024-01-01', '2024-01-01'],
              dtype='datetime64[ns]', name='Date', length=15546, freq=None)

## important: duration using datedelta

## Timedeltas
- A **Timedelta** is a pandas object that represents a duration (an amount of time).
- Subtracting two **Timestamp** objects will yield a **Timedelta** object (this applies to subtracting a **Series** from another **Series**).
- The **Timedelta** constructor accepts parameters for time as well as string descriptions.

In [32]:
pd.Timestamp("2023-03-31 12:30:48") - pd.Timestamp("2023-03-20 19:25:59")
pd.Timestamp("2023-03-20 19:25:59") - pd.Timestamp("2023-03-31 12:30:48")

pd.Timedelta(days=3, hours=2, minutes=5)
pd.Timedelta("5 minutes")
pd.Timedelta("3 days 2 hours 5 minutes")

Timedelta('3 days 02:05:00')

In [33]:
ecommerce = pd.read_csv("ecommerce.csv", index_col="ID", parse_dates=["order_date", "delivery_date"], date_format="%m/%d/%y")
ecommerce.head()

Unnamed: 0_level_0,order_date,delivery_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1998-05-24,1999-02-05
2,1992-04-22,1998-03-06
4,1991-02-10,1992-08-26
5,1992-07-21,1997-11-20
7,1993-09-02,1998-06-10


In [34]:
ecommerce["Delivery Time"] = ecommerce["delivery_date"] - ecommerce["order_date"]
ecommerce.head()

# you can also use timedelta to be added to a date

Unnamed: 0_level_0,order_date,delivery_date,Delivery Time
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1998-05-24,1999-02-05,257 days
2,1992-04-22,1998-03-06,2144 days
4,1991-02-10,1992-08-26,563 days
5,1992-07-21,1997-11-20,1948 days
7,1993-09-02,1998-06-10,1742 days


In [35]:
ecommerce["Delivery Time"].max()
ecommerce["Delivery Time"].min()
ecommerce["Delivery Time"].mean()

Timedelta('1217 days 22:53:53.532934128')

# Options and settings

## Changing Options with Attributes
- The `pd.options` module configures Pandas' settings.
- The `display` module deals with the presentation of data.
- The `min_rows` attribute sets the minimum number of displayed rows when `max_rows` is exceeded.
- The `max_columns` attribute sets the maximum number of displayed columns. There is no `min_columns` attribute.
- Overwrite an attribute's value with an equal sign and the new value.

In [36]:
df = pd.DataFrame(np.random.randint(0, 100, [61, 50]))
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,11,36,98,14,89,0,66,80,55,17,...,2,27,33,12,63,58,53,45,20,33
1,61,48,82,97,76,40,55,57,39,79,...,92,94,99,10,27,65,29,29,9,42
2,0,9,85,42,11,60,68,19,97,12,...,52,69,58,88,83,90,75,44,2,15
3,4,63,5,2,61,95,88,88,59,67,...,98,17,77,3,63,54,60,3,41,86
4,58,64,93,67,61,61,70,38,73,67,...,81,61,10,87,4,1,65,30,50,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,68,24,61,78,1,46,1,93,60,48,...,75,34,88,39,62,47,11,55,99,84
57,40,60,33,86,75,56,59,33,74,18,...,24,63,41,72,11,61,17,35,54,13
58,34,99,18,47,35,99,4,52,36,9,...,65,85,83,30,97,32,11,84,72,55
59,90,67,31,41,67,85,86,20,22,94,...,47,19,95,67,55,56,50,94,65,75


In [37]:
pd.options.display.min_rows

10

In [38]:
pd.options.display.max_rows

60

In [39]:
pd.options.display.min_rows = 20 # dislay more rows
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,11,36,98,14,89,0,66,80,55,17,...,2,27,33,12,63,58,53,45,20,33
1,61,48,82,97,76,40,55,57,39,79,...,92,94,99,10,27,65,29,29,9,42
2,0,9,85,42,11,60,68,19,97,12,...,52,69,58,88,83,90,75,44,2,15
3,4,63,5,2,61,95,88,88,59,67,...,98,17,77,3,63,54,60,3,41,86
4,58,64,93,67,61,61,70,38,73,67,...,81,61,10,87,4,1,65,30,50,24
5,38,1,57,98,34,63,26,15,18,59,...,73,79,28,15,4,56,95,98,92,96
6,37,40,76,74,99,9,26,52,92,42,...,57,50,35,2,28,17,26,24,39,55
7,43,52,73,91,85,49,31,49,31,41,...,40,17,26,49,22,95,22,84,16,71
8,70,45,57,45,36,94,94,74,17,72,...,67,83,27,73,32,32,96,24,92,95
9,68,66,13,55,68,94,30,36,77,83,...,65,73,69,45,66,82,36,76,46,36


In [40]:
## another way is to use set_option function call the display attributes

pd.set_option("display.max_columns", 16)
pd.set_option("display.max_rows", 10)

df

Unnamed: 0,0,1,2,3,4,5,6,7,...,42,43,44,45,46,47,48,49
0,11,36,98,14,89,0,66,80,...,33,12,63,58,53,45,20,33
1,61,48,82,97,76,40,55,57,...,99,10,27,65,29,29,9,42
2,0,9,85,42,11,60,68,19,...,58,88,83,90,75,44,2,15
3,4,63,5,2,61,95,88,88,...,77,3,63,54,60,3,41,86
4,58,64,93,67,61,61,70,38,...,10,87,4,1,65,30,50,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,68,24,61,78,1,46,1,93,...,88,39,62,47,11,55,99,84
57,40,60,33,86,75,56,59,33,...,41,72,11,61,17,35,54,13
58,34,99,18,47,35,99,4,52,...,83,30,97,32,11,84,72,55
59,90,67,31,41,67,85,86,20,...,95,67,55,56,50,94,65,75


In [41]:
df = pd.DataFrame(np.random.randn(5, 5))
pd.set_option("display.precision", 33) #only works for floats; change precisions
df

Unnamed: 0,0,1,2,3,4
0,-0.2238646213817646,1.8288100016543585,1.259343687917482,-0.3696638546172673,0.6845721561811675
1,-0.620296970860938,-0.5642951957220542,1.1128181084227302,1.7697112579823056,-0.916778328438023
2,-0.5800879245470016,0.0153552343468241,0.4608546956513911,0.0694584290950776,-2.135545026504201
3,-0.9069366802141056,1.0184078140682895,0.88855563602213,-0.1868294622051021,-1.168743328647584
4,-1.4529145353709867,-0.380890724303196,0.5035207778913328,-0.7930115455442119,0.0506404819637321
