In [1]:
import re

import pandas as pd


def validate_cpr(text):
    # Return all ten digit numbers as a list
    for ten_digit_number in re.findall('\d{10}', text):
        # Whatever your condition is goes here
        if int(ten_digit_number) > 1_111_111_111:
            return True
    return False

In [2]:
# Approach 1
# Assign to df and then filter that variable with []
df = pd.read_excel(
    'Downloads/CPR Check LJ RD(1).xlsx',
    dtype={'Linjeindhold for 1. fund': str}  # Apparently this column is a mix of str and int in excel
)
df[
    df['Linjeindhold for 1. fund'].apply(validate_cpr)
].to_excel('cpr-records.xlsx')

In [3]:
# Approach 2
# Chain without assigning to a variable by using query instead of [] to filter
pd.read_excel(
    'Downloads/CPR Check LJ RD(1).xlsx',
    dtype={'Linjeindhold for 1. fund': str}  # Apparently this column is a mix of str and int in excel
).query(
    '`Linjeindhold for 1. fund`.apply(@validate_cpr)'
).to_excel(
    'cpr-records.xlsx'
)

In [4]:
# Approach 3
# Same as A2 but adds an explicit step for filtering out rows that don't contain a 10 digit pattern at all
# Probably only worth it if you have a huge file to read
pd.read_excel(
    'Downloads/CPR Check LJ RD(1).xlsx',
    dtype={'Linjeindhold for 1. fund': str}  # Apparently this column is a mix of str and int in excel
).query(
    '`Linjeindhold for 1. fund`.str.contains("\d{10}")'
    '& `Linjeindhold for 1. fund`.apply(@validate_cpr)'
).to_excel(
    'cpr-records.xlsx'
)