In [1]:
import pandas as pd

# Pandas Series

In [2]:
mountain_height = pd.Series([2061, 2035.8, 2028.5, 2022.5, 2016.4])
print(mountain_height)

0    2061.0
1    2035.8
2    2028.5
3    2022.5
4    2016.4
dtype: float64


In [3]:
mountain_height = pd.Series(
    data = [2061, 2035.8, 2028.5, 2022.5, 2016.4],
    index = ['Goverla', 'Brebeneskyl', 'Pip_Ivan', 'Petros', 'Gutin_Tomnatik'],
    name = 'Height, m',
    dtype = float
)
print(mountain_height)

Goverla           2061.0
Brebeneskyl       2035.8
Pip_Ivan          2028.5
Petros            2022.5
Gutin_Tomnatik    2016.4
Name: Height, m, dtype: float64


In [4]:
print(mountain_height[0])
print(mountain_height['Goverla'])


2061.0
2061.0


In [5]:
print(mountain_height[['Pip_Ivan', 'Goverla', 'Gutin_Tomnatik']]) 

Pip_Ivan          2028.5
Goverla           2061.0
Gutin_Tomnatik    2016.4
Name: Height, m, dtype: float64


In [6]:
print(mountain_height[1:3])
print('=' * 50)
print(mountain_height['Brebeneskyl':'Petros'])

Brebeneskyl    2035.8
Pip_Ivan       2028.5
Name: Height, m, dtype: float64
Brebeneskyl    2035.8
Pip_Ivan       2028.5
Petros         2022.5
Name: Height, m, dtype: float64


In [7]:
print(mountain_height > 2030)
print('=' * 50)
print(mountain_height[mountain_height > 2030])

Goverla            True
Brebeneskyl        True
Pip_Ivan          False
Petros            False
Gutin_Tomnatik    False
Name: Height, m, dtype: bool
Goverla        2061.0
Brebeneskyl    2035.8
Name: Height, m, dtype: float64


In [8]:
print('Goverla' in mountain_height)

True


In [9]:
sorted_indexes = mountain_height.sort_index()
print(sorted_indexes)

Brebeneskyl       2035.8
Goverla           2061.0
Gutin_Tomnatik    2016.4
Petros            2022.5
Pip_Ivan          2028.5
Name: Height, m, dtype: float64


In [10]:
mountain_height.sort_values(inplace=True, ascending=False)
print(mountain_height)

Goverla           2061.0
Brebeneskyl       2035.8
Pip_Ivan          2028.5
Petros            2022.5
Gutin_Tomnatik    2016.4
Name: Height, m, dtype: float64


In [11]:
d = {"Goverla": 2061, "Brebenskyl": 2035.8, "Pip_Ivan": 2028.5}
mountain_height = pd.Series(
    d,
    index=list(d.keys()) + ['Petros', 'Gutin_Tomnatik'],
    name='Height, m',
    dtype=float
)
print(mountain_height)
print('='*50)
mountain_height.fillna(value=0, inplace=True)
print(mountain_height)

Goverla           2061.0
Brebenskyl        2035.8
Pip_Ivan          2028.5
Petros               NaN
Gutin_Tomnatik       NaN
Name: Height, m, dtype: float64
Goverla           2061.0
Brebenskyl        2035.8
Pip_Ivan          2028.5
Petros               0.0
Gutin_Tomnatik       0.0
Name: Height, m, dtype: float64


# Pandas DataFrame

In [12]:
contacts = pd.DataFrame(
    {
        'name': [
            'Allen Raymond',
            'Chaim Lewis',
            'Kennedy Lane',
            'Wylie Pope',
            'Cyrus Jackson'
        ],
        'email': [
            "nulla.ante@vestibul.co.uk",
            "dui.in@egetlacus.ca",
            "mattis.Cras@nonenimMauris.net",
            "est@utquamvel.net",
            "nibh@semsempererat.com",
        ],
        "phone": [
            "(992) 914-3792",
            "(294) 840-6685",
            "(542) 451-7038",
            "(692) 802-2949",
            "(501) 472-5218",
        ],
        "favorite": [False, False, True, False, True],        
    },
    index=[1, 2, 3, 4, 5]
)
print(contacts)

            name                          email           phone  favorite
1  Allen Raymond      nulla.ante@vestibul.co.uk  (992) 914-3792     False
2    Chaim Lewis            dui.in@egetlacus.ca  (294) 840-6685     False
3   Kennedy Lane  mattis.Cras@nonenimMauris.net  (542) 451-7038      True
4     Wylie Pope              est@utquamvel.net  (692) 802-2949     False
5  Cyrus Jackson         nibh@semsempererat.com  (501) 472-5218      True


In [13]:
print(contacts['name'])

1    Allen Raymond
2      Chaim Lewis
3     Kennedy Lane
4       Wylie Pope
5    Cyrus Jackson
Name: name, dtype: object


In [14]:
print(contacts.loc[1])

name                    Allen Raymond
email       nulla.ante@vestibul.co.uk
phone                  (992) 914-3792
favorite                        False
Name: 1, dtype: object


In [15]:
print(contacts.iloc[1])

name                Chaim Lewis
email       dui.in@egetlacus.ca
phone            (294) 840-6685
favorite                  False
Name: 2, dtype: object


In [16]:
print(contacts[0:2])

            name                      email           phone  favorite
1  Allen Raymond  nulla.ante@vestibul.co.uk  (992) 914-3792     False
2    Chaim Lewis        dui.in@egetlacus.ca  (294) 840-6685     False


In [17]:
print(contacts[contacts['favorite']])

            name                          email           phone  favorite
3   Kennedy Lane  mattis.Cras@nonenimMauris.net  (542) 451-7038      True
5  Cyrus Jackson         nibh@semsempererat.com  (501) 472-5218      True


In [18]:
users = pd.read_csv('users.csv')
print(users)

            name                          email           phone  favorite
0  Allen Raymond      nulla.ante@vestibul.co.uk  (992) 914-3792     False
1    Chaim Lewis            dui.in@egetlacus.ca  (294) 840-6685     False
2   Kennedy Lane  mattis.Cras@nonenimMauris.net  (542) 451-7038      True
3     Wylie Pope              est@utquamvel.net  (692) 802-2949     False
4  Cyrus Jackson         nibh@semsempererat.com  (501) 472-5218      True


In [19]:
users.to_csv('contacts.csv', index=False)

In [20]:
persons = pd.read_excel('persons.xlsx')
print(persons)

      name country
0  Michael  Canada
1     John     USA
2     Liza  Canada


In [21]:
users.to_excel('contacts.xlsx', sheet_name='Contacts')

In [22]:
employees = pd.read_json('./json/split.json', orient='split')
print(employees)

      name    country
1  Michael     Canada
2     John        USA
3     Liza  Australia


In [23]:
employees = pd.read_json('./json/records.json', orient='records')
print(employees)

      name    country
0  Michael     Canada
1     John        USA
2     Liza  Australia


In [24]:
employees = pd.read_json('./json/index.json', orient='index')
print(employees)

      name    country
1  Michael     Canada
2     John        USA
3     Liza  Australia


In [25]:
employees = pd.read_json('./json/columns.json', orient='columns')
print(employees)

      name    country
1  Michael     Canada
2     John        USA
3     Liza  Australia


In [26]:
employees = pd.read_json('./json/values.json', orient='values')
print(employees)

         0          1
0  Michael     Canada
1     John        USA
2     Liza  Australia


In [27]:
data = {
    "name": {"1": "Michael", "2": "John", "3": "Liza"},
    "country": {"1": "Canada", "2": "USA", "3": "Australia"}
}
employees = pd.DataFrame(data)
employees.to_json('employees.json', orient='split', indent=4)

In [28]:
tmp = pd.read_html("https://statisticstimes.com/tech/top-computer-languages.php", attrs={'id': 'table_id1'})
print(tmp[0].head())

   Jan 2024 Change Programming language    Share  Trends
0         1    NaN               Python   28.2 %  +0.5 %
1         2    NaN                 Java  15.73 %  -0.9 %
2         3    NaN           JavaScript   8.91 %  -0.6 %
3         4      ↑                C/C++    6.8 %  -0.0 %
4         5      ↓                   C#   6.67 %  -0.3 %


In [29]:
data = pd.Series([1, 2, 3])
print(data)
print('='*50)
data[3] = 4
print(data)

0    1
1    2
2    3
dtype: int64
0    1
1    2
2    3
3    4
dtype: int64


In [30]:
data = {
    "name": {"1": "Michael", "2": "John", "3": "Liza"},
    "country": {"1": "Canada", "2": "USA", "3": "Australia"}
}
employees = pd.DataFrame(data)
print(employees)
print('='*50)
employees['age'] = [25, 32, 19]
print(employees)

      name    country
1  Michael     Canada
2     John        USA
3     Liza  Australia
      name    country  age
1  Michael     Canada   25
2     John        USA   32
3     Liza  Australia   19


In [31]:
print(type(employees))

<class 'pandas.core.frame.DataFrame'>


In [32]:
data = {
    "name": {"1": "Michael", "2": "John", "3": "Liza"},
    "country": {"1": "Canada", "2": "USA", "3": "Australia"},
    "age": {"1": 25, "2": 32, "3": 19}
}

employees = pd.DataFrame(data)
print(employees)
print('=' * 50)

new_employee = pd.Series(["Jhon", "Denmark", 23], ["name", "country", "age"])

employees.loc[5] = new_employee
print(employees)

      name    country  age
1  Michael     Canada   25
2     John        USA   32
3     Liza  Australia   19
      name    country  age
1  Michael     Canada   25
2     John        USA   32
3     Liza  Australia   19
5     Jhon    Denmark   23
